diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,183693 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9777082518576456, + "eval_steps": 250, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 9.777082518576456e-05, + "grad_norm": 110.51439781623274, + "learning_rate": 5e-05, + "loss": 4.7192, + "num_input_tokens_seen": 97784, + "step": 1 + }, + { + "epoch": 9.777082518576456e-05, + "loss": 4.813364028930664, + "loss_ce": 3.522348165512085, + "loss_iou": 0.78515625, + "loss_num": 0.2578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 97784, + "step": 1 + }, + { + "epoch": 0.00019554165037152912, + "grad_norm": 118.91667740978241, + "learning_rate": 5e-05, + "loss": 4.5331, + "num_input_tokens_seen": 195548, + "step": 2 + }, + { + "epoch": 0.00019554165037152912, + "loss": 4.292417526245117, + "loss_ce": 3.400144577026367, + "loss_iou": 0.57421875, + "loss_num": 0.1787109375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 195548, + "step": 2 + }, + { + "epoch": 0.0002933124755572937, + "grad_norm": 97.48278179270092, + "learning_rate": 5e-05, + "loss": 4.3226, + "num_input_tokens_seen": 292312, + "step": 3 + }, + { + "epoch": 0.0002933124755572937, + "loss": 4.459010601043701, + "loss_ce": 3.266139507293701, + "loss_iou": 0.6015625, + "loss_num": 0.23828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 292312, + "step": 3 + }, + { + "epoch": 0.00039108330074305825, + "grad_norm": 136.88956214163665, + "learning_rate": 5e-05, + "loss": 3.6677, + "num_input_tokens_seen": 388632, + "step": 4 + }, + { + "epoch": 0.00039108330074305825, + "loss": 3.5020785331726074, + "loss_ce": 2.6031527519226074, + "loss_iou": 0.458984375, + "loss_num": 0.1796875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 388632, + "step": 4 + }, + { + "epoch": 0.0004888541259288229, + "grad_norm": 96.51076359697171, + "learning_rate": 5e-05, + "loss": 2.9961, + "num_input_tokens_seen": 486216, + "step": 5 + }, + { + "epoch": 0.0004888541259288229, + "loss": 2.756664276123047, + "loss_ce": 2.007152557373047, + "loss_iou": 0.40234375, + "loss_num": 0.150390625, + "loss_xval": 0.75, + "num_input_tokens_seen": 486216, + "step": 5 + }, + { + "epoch": 0.0005866249511145874, + "grad_norm": 167.83763987782712, + "learning_rate": 5e-05, + "loss": 3.3937, + "num_input_tokens_seen": 583296, + "step": 6 + }, + { + "epoch": 0.0005866249511145874, + "loss": 3.429818630218506, + "loss_ce": 2.337533473968506, + "loss_iou": 0.609375, + "loss_num": 0.21875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 583296, + "step": 6 + }, + { + "epoch": 0.000684395776300352, + "grad_norm": 72.00488949768335, + "learning_rate": 5e-05, + "loss": 2.9919, + "num_input_tokens_seen": 679548, + "step": 7 + }, + { + "epoch": 0.000684395776300352, + "loss": 2.817655086517334, + "loss_ce": 2.076993465423584, + "loss_iou": 0.380859375, + "loss_num": 0.1474609375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 679548, + "step": 7 + }, + { + "epoch": 0.0007821666014861165, + "grad_norm": 53.57206168798684, + "learning_rate": 5e-05, + "loss": 2.5372, + "num_input_tokens_seen": 776952, + "step": 8 + }, + { + "epoch": 0.0007821666014861165, + "loss": 2.5461690425872803, + "loss_ce": 1.3835713863372803, + "loss_iou": 0.6015625, + "loss_num": 0.232421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 776952, + "step": 8 + }, + { + "epoch": 0.0008799374266718811, + "grad_norm": 92.70871740757678, + "learning_rate": 5e-05, + "loss": 2.7161, + "num_input_tokens_seen": 874284, + "step": 9 + }, + { + "epoch": 0.0008799374266718811, + "loss": 2.735065221786499, + "loss_ce": 1.734088659286499, + "loss_iou": 0.6328125, + "loss_num": 0.2001953125, + "loss_xval": 1.0, + "num_input_tokens_seen": 874284, + "step": 9 + }, + { + "epoch": 0.0009777082518576457, + "grad_norm": 48.66944107898732, + "learning_rate": 5e-05, + "loss": 2.3773, + "num_input_tokens_seen": 971076, + "step": 10 + }, + { + "epoch": 0.0009777082518576457, + "loss": 2.329404830932617, + "loss_ce": 1.4412213563919067, + "loss_iou": 0.6328125, + "loss_num": 0.177734375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 971076, + "step": 10 + }, + { + "epoch": 0.0010754790770434103, + "grad_norm": 28.875780883874373, + "learning_rate": 5e-05, + "loss": 1.8929, + "num_input_tokens_seen": 1068592, + "step": 11 + }, + { + "epoch": 0.0010754790770434103, + "loss": 1.8606908321380615, + "loss_ce": 0.9795874357223511, + "loss_iou": 0.5078125, + "loss_num": 0.17578125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 1068592, + "step": 11 + }, + { + "epoch": 0.0011732499022291747, + "grad_norm": 37.23738579753834, + "learning_rate": 5e-05, + "loss": 2.0449, + "num_input_tokens_seen": 1165232, + "step": 12 + }, + { + "epoch": 0.0011732499022291747, + "loss": 1.9545942544937134, + "loss_ce": 1.085453748703003, + "loss_iou": 0.625, + "loss_num": 0.173828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 1165232, + "step": 12 + }, + { + "epoch": 0.0012710207274149394, + "grad_norm": 28.689150447617425, + "learning_rate": 5e-05, + "loss": 1.776, + "num_input_tokens_seen": 1261384, + "step": 13 + }, + { + "epoch": 0.0012710207274149394, + "loss": 1.7863218784332275, + "loss_ce": 0.9782164096832275, + "loss_iou": 0.443359375, + "loss_num": 0.1611328125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 1261384, + "step": 13 + }, + { + "epoch": 0.001368791552600704, + "grad_norm": 22.43088046007296, + "learning_rate": 5e-05, + "loss": 1.6109, + "num_input_tokens_seen": 1357252, + "step": 14 + }, + { + "epoch": 0.001368791552600704, + "loss": 1.7011809349060059, + "loss_ce": 0.8305754661560059, + "loss_iou": 0.38671875, + "loss_num": 0.173828125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 1357252, + "step": 14 + }, + { + "epoch": 0.0014665623777864686, + "grad_norm": 26.53838801567624, + "learning_rate": 5e-05, + "loss": 1.7987, + "num_input_tokens_seen": 1454448, + "step": 15 + }, + { + "epoch": 0.0014665623777864686, + "loss": 1.848681926727295, + "loss_ce": 0.7466311454772949, + "loss_iou": 0.466796875, + "loss_num": 0.220703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 1454448, + "step": 15 + }, + { + "epoch": 0.001564333202972233, + "grad_norm": 21.70907960038261, + "learning_rate": 5e-05, + "loss": 1.6902, + "num_input_tokens_seen": 1550852, + "step": 16 + }, + { + "epoch": 0.001564333202972233, + "loss": 1.751322627067566, + "loss_ce": 0.7210491895675659, + "loss_iou": 0.56640625, + "loss_num": 0.2060546875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 1550852, + "step": 16 + }, + { + "epoch": 0.0016621040281579976, + "grad_norm": 18.90096141741047, + "learning_rate": 5e-05, + "loss": 1.5482, + "num_input_tokens_seen": 1648840, + "step": 17 + }, + { + "epoch": 0.0016621040281579976, + "loss": 1.6046310663223267, + "loss_ce": 0.7352463006973267, + "loss_iou": 0.326171875, + "loss_num": 0.173828125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 1648840, + "step": 17 + }, + { + "epoch": 0.0017598748533437622, + "grad_norm": 16.91709372445221, + "learning_rate": 5e-05, + "loss": 1.4242, + "num_input_tokens_seen": 1746324, + "step": 18 + }, + { + "epoch": 0.0017598748533437622, + "loss": 1.3961834907531738, + "loss_ce": 0.639347493648529, + "loss_iou": 0.384765625, + "loss_num": 0.1513671875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 1746324, + "step": 18 + }, + { + "epoch": 0.0018576456785295268, + "grad_norm": 17.435948780600402, + "learning_rate": 5e-05, + "loss": 1.1859, + "num_input_tokens_seen": 1843180, + "step": 19 + }, + { + "epoch": 0.0018576456785295268, + "loss": 1.1917815208435059, + "loss_ce": 0.5965667366981506, + "loss_iou": 0.1650390625, + "loss_num": 0.119140625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 1843180, + "step": 19 + }, + { + "epoch": 0.0019554165037152915, + "grad_norm": 15.779332847100175, + "learning_rate": 5e-05, + "loss": 1.2856, + "num_input_tokens_seen": 1941436, + "step": 20 + }, + { + "epoch": 0.0019554165037152915, + "loss": 1.2958950996398926, + "loss_ce": 0.5566372275352478, + "loss_iou": 0.11083984375, + "loss_num": 0.1474609375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 1941436, + "step": 20 + }, + { + "epoch": 0.002053187328901056, + "grad_norm": 14.528800894338334, + "learning_rate": 5e-05, + "loss": 0.9766, + "num_input_tokens_seen": 2037772, + "step": 21 + }, + { + "epoch": 0.002053187328901056, + "loss": 0.996512770652771, + "loss_ce": 0.491874098777771, + "loss_iou": 0.10400390625, + "loss_num": 0.10107421875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 2037772, + "step": 21 + }, + { + "epoch": 0.0021509581540868207, + "grad_norm": 15.218897996275974, + "learning_rate": 5e-05, + "loss": 1.0613, + "num_input_tokens_seen": 2133432, + "step": 22 + }, + { + "epoch": 0.0021509581540868207, + "loss": 1.0437952280044556, + "loss_ce": 0.45981085300445557, + "loss_iou": 0.15234375, + "loss_num": 0.11669921875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 2133432, + "step": 22 + }, + { + "epoch": 0.002248728979272585, + "grad_norm": 17.212498450650557, + "learning_rate": 5e-05, + "loss": 1.0908, + "num_input_tokens_seen": 2231440, + "step": 23 + }, + { + "epoch": 0.002248728979272585, + "loss": 1.0651339292526245, + "loss_ce": 0.4413546621799469, + "loss_iou": 0.21484375, + "loss_num": 0.125, + "loss_xval": 0.625, + "num_input_tokens_seen": 2231440, + "step": 23 + }, + { + "epoch": 0.0023464998044583495, + "grad_norm": 19.101899625855186, + "learning_rate": 5e-05, + "loss": 1.0153, + "num_input_tokens_seen": 2328472, + "step": 24 + }, + { + "epoch": 0.0023464998044583495, + "loss": 0.9756933450698853, + "loss_ce": 0.40647947788238525, + "loss_iou": 0.1162109375, + "loss_num": 0.11376953125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 2328472, + "step": 24 + }, + { + "epoch": 0.0024442706296441143, + "grad_norm": 23.291011113081648, + "learning_rate": 5e-05, + "loss": 1.0657, + "num_input_tokens_seen": 2426148, + "step": 25 + }, + { + "epoch": 0.0024442706296441143, + "loss": 1.0886074304580688, + "loss_ce": 0.3598476052284241, + "loss_iou": 0.275390625, + "loss_num": 0.1455078125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 2426148, + "step": 25 + }, + { + "epoch": 0.0025420414548298787, + "grad_norm": 24.20170425366921, + "learning_rate": 5e-05, + "loss": 1.3332, + "num_input_tokens_seen": 2523224, + "step": 26 + }, + { + "epoch": 0.0025420414548298787, + "loss": 1.3531520366668701, + "loss_ce": 0.3941676616668701, + "loss_iou": 0.1875, + "loss_num": 0.19140625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 2523224, + "step": 26 + }, + { + "epoch": 0.0026398122800156435, + "grad_norm": 16.707365201189738, + "learning_rate": 5e-05, + "loss": 1.0934, + "num_input_tokens_seen": 2620264, + "step": 27 + }, + { + "epoch": 0.0026398122800156435, + "loss": 1.1080657243728638, + "loss_ce": 0.43716728687286377, + "loss_iou": 0.0712890625, + "loss_num": 0.1337890625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 2620264, + "step": 27 + }, + { + "epoch": 0.002737583105201408, + "grad_norm": 15.776772427194194, + "learning_rate": 5e-05, + "loss": 0.7933, + "num_input_tokens_seen": 2717336, + "step": 28 + }, + { + "epoch": 0.002737583105201408, + "loss": 0.844190776348114, + "loss_ce": 0.310011088848114, + "loss_iou": 0.138671875, + "loss_num": 0.10693359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 2717336, + "step": 28 + }, + { + "epoch": 0.0028353539303871723, + "grad_norm": 16.74291222422733, + "learning_rate": 5e-05, + "loss": 0.8785, + "num_input_tokens_seen": 2814016, + "step": 29 + }, + { + "epoch": 0.0028353539303871723, + "loss": 1.0115916728973389, + "loss_ce": 0.23620110750198364, + "loss_iou": 0.3125, + "loss_num": 0.1552734375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 2814016, + "step": 29 + }, + { + "epoch": 0.002933124755572937, + "grad_norm": 17.946990416124436, + "learning_rate": 5e-05, + "loss": 0.8102, + "num_input_tokens_seen": 2910704, + "step": 30 + }, + { + "epoch": 0.002933124755572937, + "loss": 0.7171972393989563, + "loss_ce": 0.1842382848262787, + "loss_iou": 0.166015625, + "loss_num": 0.1064453125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 2910704, + "step": 30 + }, + { + "epoch": 0.0030308955807587016, + "grad_norm": 17.955651636813876, + "learning_rate": 5e-05, + "loss": 0.8875, + "num_input_tokens_seen": 3007564, + "step": 31 + }, + { + "epoch": 0.0030308955807587016, + "loss": 0.7981342077255249, + "loss_ce": 0.1682514250278473, + "loss_iou": 0.361328125, + "loss_num": 0.1259765625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 3007564, + "step": 31 + }, + { + "epoch": 0.003128666405944466, + "grad_norm": 14.038033720904824, + "learning_rate": 5e-05, + "loss": 0.7544, + "num_input_tokens_seen": 3104364, + "step": 32 + }, + { + "epoch": 0.003128666405944466, + "loss": 0.7309492230415344, + "loss_ce": 0.14330272376537323, + "loss_iou": 0.09375, + "loss_num": 0.11767578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 3104364, + "step": 32 + }, + { + "epoch": 0.003226437231130231, + "grad_norm": 9.873283135632306, + "learning_rate": 5e-05, + "loss": 0.6759, + "num_input_tokens_seen": 3201716, + "step": 33 + }, + { + "epoch": 0.003226437231130231, + "loss": 0.7045077681541443, + "loss_ce": 0.1136874407529831, + "loss_iou": 0.130859375, + "loss_num": 0.1181640625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 3201716, + "step": 33 + }, + { + "epoch": 0.003324208056315995, + "grad_norm": 8.389786223551797, + "learning_rate": 5e-05, + "loss": 0.5684, + "num_input_tokens_seen": 3298896, + "step": 34 + }, + { + "epoch": 0.003324208056315995, + "loss": 0.542425274848938, + "loss_ce": 0.09296238422393799, + "loss_iou": 0.09619140625, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 3298896, + "step": 34 + }, + { + "epoch": 0.00342197888150176, + "grad_norm": 7.659318331402723, + "learning_rate": 5e-05, + "loss": 0.477, + "num_input_tokens_seen": 3396268, + "step": 35 + }, + { + "epoch": 0.00342197888150176, + "loss": 0.4835737347602844, + "loss_ce": 0.07085400819778442, + "loss_iou": 0.13671875, + "loss_num": 0.08251953125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 3396268, + "step": 35 + }, + { + "epoch": 0.0035197497066875244, + "grad_norm": 8.362359431295095, + "learning_rate": 5e-05, + "loss": 0.4467, + "num_input_tokens_seen": 3493344, + "step": 36 + }, + { + "epoch": 0.0035197497066875244, + "loss": 0.40446656942367554, + "loss_ce": 0.05131717026233673, + "loss_iou": 0.12353515625, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 3493344, + "step": 36 + }, + { + "epoch": 0.003617520531873289, + "grad_norm": 9.990518206208932, + "learning_rate": 5e-05, + "loss": 0.7724, + "num_input_tokens_seen": 3589980, + "step": 37 + }, + { + "epoch": 0.003617520531873289, + "loss": 0.7181349992752075, + "loss_ce": 0.05016622692346573, + "loss_iou": 0.177734375, + "loss_num": 0.1337890625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 3589980, + "step": 37 + }, + { + "epoch": 0.0037152913570590537, + "grad_norm": 7.9254358378303715, + "learning_rate": 5e-05, + "loss": 0.4592, + "num_input_tokens_seen": 3687332, + "step": 38 + }, + { + "epoch": 0.0037152913570590537, + "loss": 0.45856034755706787, + "loss_ce": 0.03961503133177757, + "loss_iou": 0.061279296875, + "loss_num": 0.08349609375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 3687332, + "step": 38 + }, + { + "epoch": 0.003813062182244818, + "grad_norm": 8.36642647923803, + "learning_rate": 5e-05, + "loss": 0.4889, + "num_input_tokens_seen": 3784948, + "step": 39 + }, + { + "epoch": 0.003813062182244818, + "loss": 0.5183839797973633, + "loss_ce": 0.03913590684533119, + "loss_iou": 0.1669921875, + "loss_num": 0.095703125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 3784948, + "step": 39 + }, + { + "epoch": 0.003910833007430583, + "grad_norm": 9.836266998928965, + "learning_rate": 5e-05, + "loss": 0.3658, + "num_input_tokens_seen": 3882012, + "step": 40 + }, + { + "epoch": 0.003910833007430583, + "loss": 0.32836976647377014, + "loss_ce": 0.02916017174720764, + "loss_iou": 0.1484375, + "loss_num": 0.059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 3882012, + "step": 40 + }, + { + "epoch": 0.004008603832616347, + "grad_norm": 5.236891732603085, + "learning_rate": 5e-05, + "loss": 0.4454, + "num_input_tokens_seen": 3978748, + "step": 41 + }, + { + "epoch": 0.004008603832616347, + "loss": 0.4374915063381195, + "loss_ce": 0.03978642076253891, + "loss_iou": 0.109375, + "loss_num": 0.07958984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 3978748, + "step": 41 + }, + { + "epoch": 0.004106374657802112, + "grad_norm": 5.128856533405159, + "learning_rate": 5e-05, + "loss": 0.3608, + "num_input_tokens_seen": 4076280, + "step": 42 + }, + { + "epoch": 0.004106374657802112, + "loss": 0.38873034715652466, + "loss_ce": 0.02874501422047615, + "loss_iou": 0.1875, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 4076280, + "step": 42 + }, + { + "epoch": 0.0042041454829878765, + "grad_norm": 4.744474491869756, + "learning_rate": 5e-05, + "loss": 0.3614, + "num_input_tokens_seen": 4172840, + "step": 43 + }, + { + "epoch": 0.0042041454829878765, + "loss": 0.3065931797027588, + "loss_ce": 0.016371015459299088, + "loss_iou": 0.06494140625, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 4172840, + "step": 43 + }, + { + "epoch": 0.004301916308173641, + "grad_norm": 6.990674881046719, + "learning_rate": 5e-05, + "loss": 0.3895, + "num_input_tokens_seen": 4269944, + "step": 44 + }, + { + "epoch": 0.004301916308173641, + "loss": 0.39119070768356323, + "loss_ce": 0.01753345876932144, + "loss_iou": 0.1240234375, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 4269944, + "step": 44 + }, + { + "epoch": 0.004399687133359405, + "grad_norm": 12.350004258103818, + "learning_rate": 5e-05, + "loss": 0.4938, + "num_input_tokens_seen": 4367800, + "step": 45 + }, + { + "epoch": 0.004399687133359405, + "loss": 0.5299053192138672, + "loss_ce": 0.01525692455470562, + "loss_iou": 0.353515625, + "loss_num": 0.10302734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 4367800, + "step": 45 + }, + { + "epoch": 0.00449745795854517, + "grad_norm": 10.203539122860395, + "learning_rate": 5e-05, + "loss": 0.5189, + "num_input_tokens_seen": 4463980, + "step": 46 + }, + { + "epoch": 0.00449745795854517, + "loss": 0.5415012836456299, + "loss_ce": 0.019528597593307495, + "loss_iou": 0.1171875, + "loss_num": 0.1044921875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 4463980, + "step": 46 + }, + { + "epoch": 0.004595228783730935, + "grad_norm": 17.176115671680524, + "learning_rate": 5e-05, + "loss": 0.363, + "num_input_tokens_seen": 4561088, + "step": 47 + }, + { + "epoch": 0.004595228783730935, + "loss": 0.32323384284973145, + "loss_ce": 0.0156776811927557, + "loss_iou": 0.2294921875, + "loss_num": 0.0615234375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 4561088, + "step": 47 + }, + { + "epoch": 0.004692999608916699, + "grad_norm": 3.055343474915769, + "learning_rate": 5e-05, + "loss": 0.376, + "num_input_tokens_seen": 4658620, + "step": 48 + }, + { + "epoch": 0.004692999608916699, + "loss": 0.33340150117874146, + "loss_ce": 0.014553835615515709, + "loss_iou": 0.22265625, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 4658620, + "step": 48 + }, + { + "epoch": 0.004790770434102464, + "grad_norm": 3.233801529747645, + "learning_rate": 5e-05, + "loss": 0.4203, + "num_input_tokens_seen": 4756208, + "step": 49 + }, + { + "epoch": 0.004790770434102464, + "loss": 0.5090059638023376, + "loss_ce": 0.015719860792160034, + "loss_iou": 0.1845703125, + "loss_num": 0.0986328125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 4756208, + "step": 49 + }, + { + "epoch": 0.004888541259288229, + "grad_norm": 4.194816048146741, + "learning_rate": 5e-05, + "loss": 0.3951, + "num_input_tokens_seen": 4853844, + "step": 50 + }, + { + "epoch": 0.004888541259288229, + "loss": 0.4015321731567383, + "loss_ce": 0.0116396090015769, + "loss_iou": 0.10791015625, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 4853844, + "step": 50 + }, + { + "epoch": 0.004986312084473993, + "grad_norm": 10.298311847244314, + "learning_rate": 5e-05, + "loss": 0.3828, + "num_input_tokens_seen": 4950424, + "step": 51 + }, + { + "epoch": 0.004986312084473993, + "loss": 0.383816659450531, + "loss_ce": 0.01479809544980526, + "loss_iou": 0.1572265625, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 4950424, + "step": 51 + }, + { + "epoch": 0.005084082909659757, + "grad_norm": 8.611909687720958, + "learning_rate": 5e-05, + "loss": 0.4101, + "num_input_tokens_seen": 5048212, + "step": 52 + }, + { + "epoch": 0.005084082909659757, + "loss": 0.4313870668411255, + "loss_ce": 0.009145854040980339, + "loss_iou": 0.08935546875, + "loss_num": 0.08447265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 5048212, + "step": 52 + }, + { + "epoch": 0.005181853734845522, + "grad_norm": 5.103075811010373, + "learning_rate": 5e-05, + "loss": 0.3295, + "num_input_tokens_seen": 5145276, + "step": 53 + }, + { + "epoch": 0.005181853734845522, + "loss": 0.3337019681930542, + "loss_ce": 0.010337729007005692, + "loss_iou": 0.06591796875, + "loss_num": 0.064453125, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 5145276, + "step": 53 + }, + { + "epoch": 0.005279624560031287, + "grad_norm": 5.059193676978871, + "learning_rate": 5e-05, + "loss": 0.3542, + "num_input_tokens_seen": 5241020, + "step": 54 + }, + { + "epoch": 0.005279624560031287, + "loss": 0.3685401678085327, + "loss_ce": 0.01105724647641182, + "loss_iou": 0.1015625, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 5241020, + "step": 54 + }, + { + "epoch": 0.005377395385217051, + "grad_norm": 2.9734377393813527, + "learning_rate": 5e-05, + "loss": 0.3939, + "num_input_tokens_seen": 5339036, + "step": 55 + }, + { + "epoch": 0.005377395385217051, + "loss": 0.38415804505348206, + "loss_ce": 0.014040855690836906, + "loss_iou": 0.07470703125, + "loss_num": 0.07421875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 5339036, + "step": 55 + }, + { + "epoch": 0.005475166210402816, + "grad_norm": 4.166038397792259, + "learning_rate": 5e-05, + "loss": 0.3322, + "num_input_tokens_seen": 5436092, + "step": 56 + }, + { + "epoch": 0.005475166210402816, + "loss": 0.3338373601436615, + "loss_ce": 0.012426222674548626, + "loss_iou": 0.1650390625, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 5436092, + "step": 56 + }, + { + "epoch": 0.005572937035588581, + "grad_norm": 6.315615273332558, + "learning_rate": 5e-05, + "loss": 0.3337, + "num_input_tokens_seen": 5533364, + "step": 57 + }, + { + "epoch": 0.005572937035588581, + "loss": 0.33251386880874634, + "loss_ce": 0.010126161389052868, + "loss_iou": 0.08642578125, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 5533364, + "step": 57 + }, + { + "epoch": 0.005670707860774345, + "grad_norm": 6.237361640318922, + "learning_rate": 5e-05, + "loss": 0.392, + "num_input_tokens_seen": 5630756, + "step": 58 + }, + { + "epoch": 0.005670707860774345, + "loss": 0.4371787905693054, + "loss_ce": 0.011885832995176315, + "loss_iou": 0.09228515625, + "loss_num": 0.0849609375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 5630756, + "step": 58 + }, + { + "epoch": 0.0057684786859601095, + "grad_norm": 4.253981085933397, + "learning_rate": 5e-05, + "loss": 0.2807, + "num_input_tokens_seen": 5727936, + "step": 59 + }, + { + "epoch": 0.0057684786859601095, + "loss": 0.2514991760253906, + "loss_ce": 0.00564955547451973, + "loss_iou": 0.18359375, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 5727936, + "step": 59 + }, + { + "epoch": 0.005866249511145874, + "grad_norm": 5.594111287592901, + "learning_rate": 5e-05, + "loss": 0.3049, + "num_input_tokens_seen": 5825032, + "step": 60 + }, + { + "epoch": 0.005866249511145874, + "loss": 0.3476724922657013, + "loss_ce": 0.015763314440846443, + "loss_iou": 0.2099609375, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 5825032, + "step": 60 + }, + { + "epoch": 0.005964020336331638, + "grad_norm": 4.558807862682041, + "learning_rate": 5e-05, + "loss": 0.2773, + "num_input_tokens_seen": 5921632, + "step": 61 + }, + { + "epoch": 0.005964020336331638, + "loss": 0.30556485056877136, + "loss_ce": 0.009239151142537594, + "loss_iou": 0.1533203125, + "loss_num": 0.059326171875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 5921632, + "step": 61 + }, + { + "epoch": 0.006061791161517403, + "grad_norm": 7.629697263017113, + "learning_rate": 5e-05, + "loss": 0.3064, + "num_input_tokens_seen": 6018616, + "step": 62 + }, + { + "epoch": 0.006061791161517403, + "loss": 0.32375264167785645, + "loss_ce": 0.011008486151695251, + "loss_iou": 0.20703125, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 6018616, + "step": 62 + }, + { + "epoch": 0.006159561986703168, + "grad_norm": 8.11577319448297, + "learning_rate": 5e-05, + "loss": 0.5703, + "num_input_tokens_seen": 6116572, + "step": 63 + }, + { + "epoch": 0.006159561986703168, + "loss": 0.6277604103088379, + "loss_ce": 0.007155001629143953, + "loss_iou": 0.039794921875, + "loss_num": 0.1240234375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 6116572, + "step": 63 + }, + { + "epoch": 0.006257332811888932, + "grad_norm": 6.716883239748546, + "learning_rate": 5e-05, + "loss": 0.3169, + "num_input_tokens_seen": 6212964, + "step": 64 + }, + { + "epoch": 0.006257332811888932, + "loss": 0.30473726987838745, + "loss_ce": 0.007129846140742302, + "loss_iou": 0.07568359375, + "loss_num": 0.0595703125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 6212964, + "step": 64 + }, + { + "epoch": 0.006355103637074697, + "grad_norm": 9.887179923529969, + "learning_rate": 5e-05, + "loss": 0.2513, + "num_input_tokens_seen": 6309908, + "step": 65 + }, + { + "epoch": 0.006355103637074697, + "loss": 0.30165478587150574, + "loss_ce": 0.007526384200900793, + "loss_iou": 0.04736328125, + "loss_num": 0.058837890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 6309908, + "step": 65 + }, + { + "epoch": 0.006452874462260462, + "grad_norm": 6.680402491641383, + "learning_rate": 5e-05, + "loss": 0.3173, + "num_input_tokens_seen": 6407212, + "step": 66 + }, + { + "epoch": 0.006452874462260462, + "loss": 0.32441699504852295, + "loss_ce": 0.013381856493651867, + "loss_iou": 0.201171875, + "loss_num": 0.062255859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 6407212, + "step": 66 + }, + { + "epoch": 0.0065506452874462264, + "grad_norm": 5.8088376157777475, + "learning_rate": 5e-05, + "loss": 0.2714, + "num_input_tokens_seen": 6504956, + "step": 67 + }, + { + "epoch": 0.0065506452874462264, + "loss": 0.28717976808547974, + "loss_ce": 0.008127041161060333, + "loss_iou": 0.15625, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 6504956, + "step": 67 + }, + { + "epoch": 0.00664841611263199, + "grad_norm": 6.4613822878455, + "learning_rate": 5e-05, + "loss": 0.2869, + "num_input_tokens_seen": 6601988, + "step": 68 + }, + { + "epoch": 0.00664841611263199, + "loss": 0.2848471701145172, + "loss_ce": 0.008357915095984936, + "loss_iou": 0.26171875, + "loss_num": 0.05517578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 6601988, + "step": 68 + }, + { + "epoch": 0.006746186937817755, + "grad_norm": 6.2664355350301975, + "learning_rate": 5e-05, + "loss": 0.3129, + "num_input_tokens_seen": 6699096, + "step": 69 + }, + { + "epoch": 0.006746186937817755, + "loss": 0.396085262298584, + "loss_ce": 0.007657552603632212, + "loss_iou": 0.267578125, + "loss_num": 0.07763671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 6699096, + "step": 69 + }, + { + "epoch": 0.00684395776300352, + "grad_norm": 7.707132023783147, + "learning_rate": 5e-05, + "loss": 0.2784, + "num_input_tokens_seen": 6796368, + "step": 70 + }, + { + "epoch": 0.00684395776300352, + "loss": 0.27967894077301025, + "loss_ce": 0.008804923854768276, + "loss_iou": 0.28125, + "loss_num": 0.05419921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 6796368, + "step": 70 + }, + { + "epoch": 0.006941728588189284, + "grad_norm": 9.066953786750226, + "learning_rate": 5e-05, + "loss": 0.3447, + "num_input_tokens_seen": 6893064, + "step": 71 + }, + { + "epoch": 0.006941728588189284, + "loss": 0.369411438703537, + "loss_ce": 0.009181949310004711, + "loss_iou": 0.498046875, + "loss_num": 0.072265625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 6893064, + "step": 71 + }, + { + "epoch": 0.007039499413375049, + "grad_norm": 7.578595734656564, + "learning_rate": 5e-05, + "loss": 0.3002, + "num_input_tokens_seen": 6989836, + "step": 72 + }, + { + "epoch": 0.007039499413375049, + "loss": 0.32048672437667847, + "loss_ce": 0.0075594959780573845, + "loss_iou": 0.07763671875, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 6989836, + "step": 72 + }, + { + "epoch": 0.007137270238560814, + "grad_norm": 6.585272080508882, + "learning_rate": 5e-05, + "loss": 0.368, + "num_input_tokens_seen": 7086588, + "step": 73 + }, + { + "epoch": 0.007137270238560814, + "loss": 0.3698171079158783, + "loss_ce": 0.01233419869095087, + "loss_iou": 0.0986328125, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 7086588, + "step": 73 + }, + { + "epoch": 0.007235041063746578, + "grad_norm": 3.5691365689975223, + "learning_rate": 5e-05, + "loss": 0.3836, + "num_input_tokens_seen": 7183996, + "step": 74 + }, + { + "epoch": 0.007235041063746578, + "loss": 0.34225887060165405, + "loss_ce": 0.00741998665034771, + "loss_iou": 0.203125, + "loss_num": 0.06689453125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 7183996, + "step": 74 + }, + { + "epoch": 0.0073328118889323425, + "grad_norm": 3.821346443592607, + "learning_rate": 5e-05, + "loss": 0.2885, + "num_input_tokens_seen": 7280876, + "step": 75 + }, + { + "epoch": 0.0073328118889323425, + "loss": 0.30185002088546753, + "loss_ce": 0.007538496516644955, + "loss_iou": 0.2314453125, + "loss_num": 0.058837890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 7280876, + "step": 75 + }, + { + "epoch": 0.007430582714118107, + "grad_norm": 4.75899208613689, + "learning_rate": 5e-05, + "loss": 0.2974, + "num_input_tokens_seen": 7377096, + "step": 76 + }, + { + "epoch": 0.007430582714118107, + "loss": 0.33425086736679077, + "loss_ce": 0.00893348827958107, + "loss_iou": 0.16796875, + "loss_num": 0.06494140625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 7377096, + "step": 76 + }, + { + "epoch": 0.007528353539303871, + "grad_norm": 5.384872247759784, + "learning_rate": 5e-05, + "loss": 0.2379, + "num_input_tokens_seen": 7473880, + "step": 77 + }, + { + "epoch": 0.007528353539303871, + "loss": 0.20201191306114197, + "loss_ce": 0.0067604463547468185, + "loss_iou": 0.16796875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 7473880, + "step": 77 + }, + { + "epoch": 0.007626124364489636, + "grad_norm": 5.73101195415809, + "learning_rate": 5e-05, + "loss": 0.3055, + "num_input_tokens_seen": 7569988, + "step": 78 + }, + { + "epoch": 0.007626124364489636, + "loss": 0.2872313857078552, + "loss_ce": 0.007568285800516605, + "loss_iou": 0.11279296875, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 7569988, + "step": 78 + }, + { + "epoch": 0.007723895189675401, + "grad_norm": 6.884181031227646, + "learning_rate": 5e-05, + "loss": 0.3134, + "num_input_tokens_seen": 7667008, + "step": 79 + }, + { + "epoch": 0.007723895189675401, + "loss": 0.3000621795654297, + "loss_ce": 0.012464523315429688, + "loss_iou": 0.1982421875, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 7667008, + "step": 79 + }, + { + "epoch": 0.007821666014861166, + "grad_norm": 4.406345471184927, + "learning_rate": 5e-05, + "loss": 0.2523, + "num_input_tokens_seen": 7763800, + "step": 80 + }, + { + "epoch": 0.007821666014861166, + "loss": 0.2133225053548813, + "loss_ce": 0.009526122361421585, + "loss_iou": 0.228515625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 7763800, + "step": 80 + }, + { + "epoch": 0.00791943684004693, + "grad_norm": 13.384436506965116, + "learning_rate": 5e-05, + "loss": 0.2278, + "num_input_tokens_seen": 7860496, + "step": 81 + }, + { + "epoch": 0.00791943684004693, + "loss": 0.2087518870830536, + "loss_ce": 0.007580003701150417, + "loss_iou": 0.2373046875, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 7860496, + "step": 81 + }, + { + "epoch": 0.008017207665232694, + "grad_norm": 8.773494324054878, + "learning_rate": 5e-05, + "loss": 0.3714, + "num_input_tokens_seen": 7958352, + "step": 82 + }, + { + "epoch": 0.008017207665232694, + "loss": 0.3536292314529419, + "loss_ce": 0.00658334419131279, + "loss_iou": 0.1689453125, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 7958352, + "step": 82 + }, + { + "epoch": 0.00811497849041846, + "grad_norm": 4.917684369889504, + "learning_rate": 5e-05, + "loss": 0.277, + "num_input_tokens_seen": 8055668, + "step": 83 + }, + { + "epoch": 0.00811497849041846, + "loss": 0.3178722858428955, + "loss_ce": 0.013428924605250359, + "loss_iou": 0.1513671875, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 8055668, + "step": 83 + }, + { + "epoch": 0.008212749315604223, + "grad_norm": 3.766145189673827, + "learning_rate": 5e-05, + "loss": 0.3043, + "num_input_tokens_seen": 8152660, + "step": 84 + }, + { + "epoch": 0.008212749315604223, + "loss": 0.3658329248428345, + "loss_ce": 0.011523844674229622, + "loss_iou": 0.1962890625, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 8152660, + "step": 84 + }, + { + "epoch": 0.008310520140789989, + "grad_norm": 4.788710084623214, + "learning_rate": 5e-05, + "loss": 0.2232, + "num_input_tokens_seen": 8250012, + "step": 85 + }, + { + "epoch": 0.008310520140789989, + "loss": 0.2299194186925888, + "loss_ce": 0.005798330996185541, + "loss_iou": 0.1845703125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 8250012, + "step": 85 + }, + { + "epoch": 0.008408290965975753, + "grad_norm": 4.027776659349719, + "learning_rate": 5e-05, + "loss": 0.1872, + "num_input_tokens_seen": 8347272, + "step": 86 + }, + { + "epoch": 0.008408290965975753, + "loss": 0.2020183652639389, + "loss_ce": 0.006965262815356255, + "loss_iou": 0.265625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 8347272, + "step": 86 + }, + { + "epoch": 0.008506061791161517, + "grad_norm": 4.7492087019289615, + "learning_rate": 5e-05, + "loss": 0.2972, + "num_input_tokens_seen": 8444896, + "step": 87 + }, + { + "epoch": 0.008506061791161517, + "loss": 0.2941203713417053, + "loss_ce": 0.00627860426902771, + "loss_iou": 0.380859375, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 8444896, + "step": 87 + }, + { + "epoch": 0.008603832616347283, + "grad_norm": 3.486878295366147, + "learning_rate": 5e-05, + "loss": 0.2255, + "num_input_tokens_seen": 8540960, + "step": 88 + }, + { + "epoch": 0.008603832616347283, + "loss": 0.22416046261787415, + "loss_ce": 0.004861145280301571, + "loss_iou": 0.1328125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 8540960, + "step": 88 + }, + { + "epoch": 0.008701603441533047, + "grad_norm": 4.960969322978329, + "learning_rate": 5e-05, + "loss": 0.2305, + "num_input_tokens_seen": 8637112, + "step": 89 + }, + { + "epoch": 0.008701603441533047, + "loss": 0.2654375433921814, + "loss_ce": 0.009211954660713673, + "loss_iou": 0.255859375, + "loss_num": 0.05126953125, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 8637112, + "step": 89 + }, + { + "epoch": 0.00879937426671881, + "grad_norm": 4.195609719739853, + "learning_rate": 5e-05, + "loss": 0.2933, + "num_input_tokens_seen": 8733944, + "step": 90 + }, + { + "epoch": 0.00879937426671881, + "loss": 0.22761142253875732, + "loss_ce": 0.0048331101424992085, + "loss_iou": 0.220703125, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 8733944, + "step": 90 + }, + { + "epoch": 0.008897145091904576, + "grad_norm": 6.105302383163121, + "learning_rate": 5e-05, + "loss": 0.2761, + "num_input_tokens_seen": 8831160, + "step": 91 + }, + { + "epoch": 0.008897145091904576, + "loss": 0.30207714438438416, + "loss_ce": 0.00813182070851326, + "loss_iou": 0.30859375, + "loss_num": 0.058837890625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 8831160, + "step": 91 + }, + { + "epoch": 0.00899491591709034, + "grad_norm": 4.642888851345922, + "learning_rate": 5e-05, + "loss": 0.3173, + "num_input_tokens_seen": 8928848, + "step": 92 + }, + { + "epoch": 0.00899491591709034, + "loss": 0.2873644232749939, + "loss_ce": 0.00684683583676815, + "loss_iou": 0.2197265625, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 8928848, + "step": 92 + }, + { + "epoch": 0.009092686742276104, + "grad_norm": 4.485490442096059, + "learning_rate": 5e-05, + "loss": 0.2625, + "num_input_tokens_seen": 9025372, + "step": 93 + }, + { + "epoch": 0.009092686742276104, + "loss": 0.24821260571479797, + "loss_ce": 0.0063913180492818356, + "loss_iou": 0.09619140625, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 9025372, + "step": 93 + }, + { + "epoch": 0.00919045756746187, + "grad_norm": 5.677061117336446, + "learning_rate": 5e-05, + "loss": 0.2568, + "num_input_tokens_seen": 9121980, + "step": 94 + }, + { + "epoch": 0.00919045756746187, + "loss": 0.23036891222000122, + "loss_ce": 0.015525158494710922, + "loss_iou": 0.193359375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 9121980, + "step": 94 + }, + { + "epoch": 0.009288228392647634, + "grad_norm": 7.162144075218135, + "learning_rate": 5e-05, + "loss": 0.2977, + "num_input_tokens_seen": 9219004, + "step": 95 + }, + { + "epoch": 0.009288228392647634, + "loss": 0.323424756526947, + "loss_ce": 0.008300228975713253, + "loss_iou": 0.13671875, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 9219004, + "step": 95 + }, + { + "epoch": 0.009385999217833398, + "grad_norm": 9.53373400070288, + "learning_rate": 5e-05, + "loss": 0.2035, + "num_input_tokens_seen": 9316828, + "step": 96 + }, + { + "epoch": 0.009385999217833398, + "loss": 0.1684752255678177, + "loss_ce": 0.009539680555462837, + "loss_iou": 0.287109375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 9316828, + "step": 96 + }, + { + "epoch": 0.009483770043019164, + "grad_norm": 12.992755019518782, + "learning_rate": 5e-05, + "loss": 0.2875, + "num_input_tokens_seen": 9412784, + "step": 97 + }, + { + "epoch": 0.009483770043019164, + "loss": 0.25802361965179443, + "loss_ce": 0.009000184014439583, + "loss_iou": 0.171875, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 9412784, + "step": 97 + }, + { + "epoch": 0.009581540868204928, + "grad_norm": 4.320861952812101, + "learning_rate": 5e-05, + "loss": 0.2298, + "num_input_tokens_seen": 9510120, + "step": 98 + }, + { + "epoch": 0.009581540868204928, + "loss": 0.2541084289550781, + "loss_ce": 0.005390159785747528, + "loss_iou": 0.2734375, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 9510120, + "step": 98 + }, + { + "epoch": 0.009679311693390692, + "grad_norm": 2.4145548311154013, + "learning_rate": 5e-05, + "loss": 0.1512, + "num_input_tokens_seen": 9606388, + "step": 99 + }, + { + "epoch": 0.009679311693390692, + "loss": 0.1716328263282776, + "loss_ce": 0.005037371069192886, + "loss_iou": 0.359375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 9606388, + "step": 99 + }, + { + "epoch": 0.009777082518576457, + "grad_norm": 6.183336741231782, + "learning_rate": 5e-05, + "loss": 0.2274, + "num_input_tokens_seen": 9703528, + "step": 100 + }, + { + "epoch": 0.009777082518576457, + "loss": 0.20822414755821228, + "loss_ce": 0.006197785958647728, + "loss_iou": 0.302734375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 9703528, + "step": 100 + }, + { + "epoch": 0.009874853343762221, + "grad_norm": 4.758201069292097, + "learning_rate": 5e-05, + "loss": 0.2601, + "num_input_tokens_seen": 9800212, + "step": 101 + }, + { + "epoch": 0.009874853343762221, + "loss": 0.23223954439163208, + "loss_ce": 0.007996363565325737, + "loss_iou": 0.2265625, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 9800212, + "step": 101 + }, + { + "epoch": 0.009972624168947985, + "grad_norm": 3.70344606181279, + "learning_rate": 5e-05, + "loss": 0.2683, + "num_input_tokens_seen": 9897948, + "step": 102 + }, + { + "epoch": 0.009972624168947985, + "loss": 0.21028941869735718, + "loss_ce": 0.006004763767123222, + "loss_iou": 0.28125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 9897948, + "step": 102 + }, + { + "epoch": 0.010070394994133751, + "grad_norm": 4.4960581327809095, + "learning_rate": 5e-05, + "loss": 0.224, + "num_input_tokens_seen": 9994908, + "step": 103 + }, + { + "epoch": 0.010070394994133751, + "loss": 0.18734873831272125, + "loss_ce": 0.005921731702983379, + "loss_iou": 0.28515625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 9994908, + "step": 103 + }, + { + "epoch": 0.010168165819319515, + "grad_norm": 6.2979354415318385, + "learning_rate": 5e-05, + "loss": 0.3094, + "num_input_tokens_seen": 10091952, + "step": 104 + }, + { + "epoch": 0.010168165819319515, + "loss": 0.22074362635612488, + "loss_ce": 0.0046791620552539825, + "loss_iou": 0.0927734375, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 10091952, + "step": 104 + }, + { + "epoch": 0.010265936644505279, + "grad_norm": 3.614661950094392, + "learning_rate": 5e-05, + "loss": 0.2573, + "num_input_tokens_seen": 10188044, + "step": 105 + }, + { + "epoch": 0.010265936644505279, + "loss": 0.30025190114974976, + "loss_ce": 0.007283124141395092, + "loss_iou": 0.2275390625, + "loss_num": 0.05859375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 10188044, + "step": 105 + }, + { + "epoch": 0.010363707469691045, + "grad_norm": 15.683606487363871, + "learning_rate": 5e-05, + "loss": 0.2488, + "num_input_tokens_seen": 10285052, + "step": 106 + }, + { + "epoch": 0.010363707469691045, + "loss": 0.23048678040504456, + "loss_ce": 0.008623982779681683, + "loss_iou": 0.2451171875, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 10285052, + "step": 106 + }, + { + "epoch": 0.010461478294876808, + "grad_norm": 4.55603521048472, + "learning_rate": 5e-05, + "loss": 0.2219, + "num_input_tokens_seen": 10382220, + "step": 107 + }, + { + "epoch": 0.010461478294876808, + "loss": 0.2487013041973114, + "loss_ce": 0.0064527736976742744, + "loss_iou": 0.39453125, + "loss_num": 0.048583984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 10382220, + "step": 107 + }, + { + "epoch": 0.010559249120062574, + "grad_norm": 9.256896769409382, + "learning_rate": 5e-05, + "loss": 0.2131, + "num_input_tokens_seen": 10478868, + "step": 108 + }, + { + "epoch": 0.010559249120062574, + "loss": 0.19378724694252014, + "loss_ce": 0.004608779214322567, + "loss_iou": 0.4140625, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 10478868, + "step": 108 + }, + { + "epoch": 0.010657019945248338, + "grad_norm": 7.850322743413161, + "learning_rate": 5e-05, + "loss": 0.3083, + "num_input_tokens_seen": 10576140, + "step": 109 + }, + { + "epoch": 0.010657019945248338, + "loss": 0.29823026061058044, + "loss_ce": 0.00343045755289495, + "loss_iou": 0.396484375, + "loss_num": 0.058837890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 10576140, + "step": 109 + }, + { + "epoch": 0.010754790770434102, + "grad_norm": 9.831040037157482, + "learning_rate": 5e-05, + "loss": 0.3578, + "num_input_tokens_seen": 10673312, + "step": 110 + }, + { + "epoch": 0.010754790770434102, + "loss": 0.3560173511505127, + "loss_ce": 0.014617210254073143, + "loss_iou": 0.2451171875, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 10673312, + "step": 110 + }, + { + "epoch": 0.010852561595619868, + "grad_norm": 26.76291719536847, + "learning_rate": 5e-05, + "loss": 0.6255, + "num_input_tokens_seen": 10770192, + "step": 111 + }, + { + "epoch": 0.010852561595619868, + "loss": 0.5735502243041992, + "loss_ce": 0.37262243032455444, + "loss_iou": 0.15625, + "loss_num": 0.0400390625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 10770192, + "step": 111 + }, + { + "epoch": 0.010950332420805632, + "grad_norm": 33.355102067548074, + "learning_rate": 5e-05, + "loss": 1.4347, + "num_input_tokens_seen": 10868004, + "step": 112 + }, + { + "epoch": 0.010950332420805632, + "loss": 1.5023423433303833, + "loss_ce": 1.0924302339553833, + "loss_iou": 0.40234375, + "loss_num": 0.08203125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 10868004, + "step": 112 + }, + { + "epoch": 0.011048103245991396, + "grad_norm": 22.4113366388039, + "learning_rate": 5e-05, + "loss": 1.3737, + "num_input_tokens_seen": 10965368, + "step": 113 + }, + { + "epoch": 0.011048103245991396, + "loss": 1.3776779174804688, + "loss_ce": 0.9130784273147583, + "loss_iou": 0.703125, + "loss_num": 0.0927734375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 10965368, + "step": 113 + }, + { + "epoch": 0.011145874071177161, + "grad_norm": 13.055455434549502, + "learning_rate": 5e-05, + "loss": 0.6315, + "num_input_tokens_seen": 11063304, + "step": 114 + }, + { + "epoch": 0.011145874071177161, + "loss": 0.6048953533172607, + "loss_ce": 0.29984164237976074, + "loss_iou": 0.26953125, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 11063304, + "step": 114 + }, + { + "epoch": 0.011243644896362925, + "grad_norm": 14.69106716761115, + "learning_rate": 5e-05, + "loss": 0.3208, + "num_input_tokens_seen": 11159588, + "step": 115 + }, + { + "epoch": 0.011243644896362925, + "loss": 0.31732818484306335, + "loss_ce": 0.15869778394699097, + "loss_iou": 0.341796875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 11159588, + "step": 115 + }, + { + "epoch": 0.01134141572154869, + "grad_norm": 16.293497365948337, + "learning_rate": 5e-05, + "loss": 0.5619, + "num_input_tokens_seen": 11255760, + "step": 116 + }, + { + "epoch": 0.01134141572154869, + "loss": 0.5518399477005005, + "loss_ce": 0.3346768617630005, + "loss_iou": 0.34765625, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 11255760, + "step": 116 + }, + { + "epoch": 0.011439186546734455, + "grad_norm": 20.381265804183702, + "learning_rate": 5e-05, + "loss": 0.5387, + "num_input_tokens_seen": 11353228, + "step": 117 + }, + { + "epoch": 0.011439186546734455, + "loss": 0.5420895218849182, + "loss_ce": 0.2203122079372406, + "loss_iou": 0.21484375, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 11353228, + "step": 117 + }, + { + "epoch": 0.011536957371920219, + "grad_norm": 7.8669061341389765, + "learning_rate": 5e-05, + "loss": 0.4889, + "num_input_tokens_seen": 11450568, + "step": 118 + }, + { + "epoch": 0.011536957371920219, + "loss": 0.4992724061012268, + "loss_ce": 0.1306200921535492, + "loss_iou": 0.1962890625, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 11450568, + "step": 118 + }, + { + "epoch": 0.011634728197105983, + "grad_norm": 6.8691216131725215, + "learning_rate": 5e-05, + "loss": 0.3489, + "num_input_tokens_seen": 11547464, + "step": 119 + }, + { + "epoch": 0.011634728197105983, + "loss": 0.2912801504135132, + "loss_ce": 0.04274498298764229, + "loss_iou": 0.302734375, + "loss_num": 0.0498046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 11547464, + "step": 119 + }, + { + "epoch": 0.011732499022291749, + "grad_norm": 4.252465848211654, + "learning_rate": 5e-05, + "loss": 0.255, + "num_input_tokens_seen": 11644804, + "step": 120 + }, + { + "epoch": 0.011732499022291749, + "loss": 0.29378244280815125, + "loss_ce": 0.013020719401538372, + "loss_iou": 0.2421875, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 11644804, + "step": 120 + }, + { + "epoch": 0.011830269847477513, + "grad_norm": 3.7257795940138587, + "learning_rate": 5e-05, + "loss": 0.2459, + "num_input_tokens_seen": 11741504, + "step": 121 + }, + { + "epoch": 0.011830269847477513, + "loss": 0.27493521571159363, + "loss_ce": 0.010988680645823479, + "loss_iou": 0.169921875, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 11741504, + "step": 121 + }, + { + "epoch": 0.011928040672663277, + "grad_norm": 3.242359401187384, + "learning_rate": 5e-05, + "loss": 0.2346, + "num_input_tokens_seen": 11838504, + "step": 122 + }, + { + "epoch": 0.011928040672663277, + "loss": 0.2652418613433838, + "loss_ce": 0.01139664463698864, + "loss_iou": 0.3125, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 11838504, + "step": 122 + }, + { + "epoch": 0.012025811497849042, + "grad_norm": 3.413353843653016, + "learning_rate": 5e-05, + "loss": 0.1907, + "num_input_tokens_seen": 11935472, + "step": 123 + }, + { + "epoch": 0.012025811497849042, + "loss": 0.19107796251773834, + "loss_ce": 0.007972488179802895, + "loss_iou": 0.19140625, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 11935472, + "step": 123 + }, + { + "epoch": 0.012123582323034806, + "grad_norm": 5.716984859146709, + "learning_rate": 5e-05, + "loss": 0.2531, + "num_input_tokens_seen": 12032900, + "step": 124 + }, + { + "epoch": 0.012123582323034806, + "loss": 0.2570669651031494, + "loss_ce": 0.012438078410923481, + "loss_iou": 0.298828125, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 12032900, + "step": 124 + }, + { + "epoch": 0.01222135314822057, + "grad_norm": 3.968293675030658, + "learning_rate": 5e-05, + "loss": 0.2471, + "num_input_tokens_seen": 12129464, + "step": 125 + }, + { + "epoch": 0.01222135314822057, + "loss": 0.269656240940094, + "loss_ce": 0.011965811252593994, + "loss_iou": 0.193359375, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 12129464, + "step": 125 + }, + { + "epoch": 0.012319123973406336, + "grad_norm": 65.97825698048318, + "learning_rate": 5e-05, + "loss": 0.2578, + "num_input_tokens_seen": 12226440, + "step": 126 + }, + { + "epoch": 0.012319123973406336, + "loss": 0.24614185094833374, + "loss_ce": 0.007372324820607901, + "loss_iou": 0.3046875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 12226440, + "step": 126 + }, + { + "epoch": 0.0124168947985921, + "grad_norm": 6.959367825714183, + "learning_rate": 5e-05, + "loss": 0.1903, + "num_input_tokens_seen": 12323796, + "step": 127 + }, + { + "epoch": 0.0124168947985921, + "loss": 0.21953894197940826, + "loss_ce": 0.004512084648013115, + "loss_iou": 0.42578125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 12323796, + "step": 127 + }, + { + "epoch": 0.012514665623777864, + "grad_norm": 5.195751563818454, + "learning_rate": 5e-05, + "loss": 0.2155, + "num_input_tokens_seen": 12418456, + "step": 128 + }, + { + "epoch": 0.012514665623777864, + "loss": 0.22596561908721924, + "loss_ce": 0.012037405744194984, + "loss_iou": 0.259765625, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 12418456, + "step": 128 + }, + { + "epoch": 0.01261243644896363, + "grad_norm": 5.776983983050901, + "learning_rate": 5e-05, + "loss": 0.2574, + "num_input_tokens_seen": 12515688, + "step": 129 + }, + { + "epoch": 0.01261243644896363, + "loss": 0.22908541560173035, + "loss_ce": 0.01137302815914154, + "loss_iou": 0.376953125, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 12515688, + "step": 129 + }, + { + "epoch": 0.012710207274149394, + "grad_norm": 5.069074361615525, + "learning_rate": 5e-05, + "loss": 0.2759, + "num_input_tokens_seen": 12612928, + "step": 130 + }, + { + "epoch": 0.012710207274149394, + "loss": 0.253068208694458, + "loss_ce": 0.012223497033119202, + "loss_iou": 0.1162109375, + "loss_num": 0.048095703125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 12612928, + "step": 130 + }, + { + "epoch": 0.012807978099335158, + "grad_norm": 4.750340335368411, + "learning_rate": 5e-05, + "loss": 0.1762, + "num_input_tokens_seen": 12709344, + "step": 131 + }, + { + "epoch": 0.012807978099335158, + "loss": 0.18803320825099945, + "loss_ce": 0.008437261916697025, + "loss_iou": 0.3046875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 12709344, + "step": 131 + }, + { + "epoch": 0.012905748924520923, + "grad_norm": 3.8378077070170793, + "learning_rate": 5e-05, + "loss": 0.2607, + "num_input_tokens_seen": 12806508, + "step": 132 + }, + { + "epoch": 0.012905748924520923, + "loss": 0.2723312973976135, + "loss_ce": 0.011772220954298973, + "loss_iou": 0.205078125, + "loss_num": 0.05224609375, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 12806508, + "step": 132 + }, + { + "epoch": 0.013003519749706687, + "grad_norm": 4.554570812503231, + "learning_rate": 5e-05, + "loss": 0.2205, + "num_input_tokens_seen": 12903352, + "step": 133 + }, + { + "epoch": 0.013003519749706687, + "loss": 0.2394300103187561, + "loss_ce": 0.01115850917994976, + "loss_iou": 0.4375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 12903352, + "step": 133 + }, + { + "epoch": 0.013101290574892453, + "grad_norm": 6.906874786943017, + "learning_rate": 5e-05, + "loss": 0.3269, + "num_input_tokens_seen": 13000448, + "step": 134 + }, + { + "epoch": 0.013101290574892453, + "loss": 0.2818675637245178, + "loss_ce": 0.014045299962162971, + "loss_iou": 0.1611328125, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 13000448, + "step": 134 + }, + { + "epoch": 0.013199061400078217, + "grad_norm": 6.492629824172584, + "learning_rate": 5e-05, + "loss": 0.2552, + "num_input_tokens_seen": 13097420, + "step": 135 + }, + { + "epoch": 0.013199061400078217, + "loss": 0.20966781675815582, + "loss_ce": 0.00550522655248642, + "loss_iou": 0.228515625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 13097420, + "step": 135 + }, + { + "epoch": 0.01329683222526398, + "grad_norm": 3.788728434004895, + "learning_rate": 5e-05, + "loss": 0.2556, + "num_input_tokens_seen": 13193756, + "step": 136 + }, + { + "epoch": 0.01329683222526398, + "loss": 0.24739064276218414, + "loss_ce": 0.008560064248740673, + "loss_iou": 0.357421875, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 13193756, + "step": 136 + }, + { + "epoch": 0.013394603050449747, + "grad_norm": 3.9136134934373166, + "learning_rate": 5e-05, + "loss": 0.1977, + "num_input_tokens_seen": 13291300, + "step": 137 + }, + { + "epoch": 0.013394603050449747, + "loss": 0.2240983545780182, + "loss_ce": 0.009987056255340576, + "loss_iou": 0.251953125, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 13291300, + "step": 137 + }, + { + "epoch": 0.01349237387563551, + "grad_norm": 3.0739776646181496, + "learning_rate": 5e-05, + "loss": 0.1942, + "num_input_tokens_seen": 13387076, + "step": 138 + }, + { + "epoch": 0.01349237387563551, + "loss": 0.21401433646678925, + "loss_ce": 0.012323660776019096, + "loss_iou": 0.2119140625, + "loss_num": 0.040283203125, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 13387076, + "step": 138 + }, + { + "epoch": 0.013590144700821274, + "grad_norm": 3.9432145081120056, + "learning_rate": 5e-05, + "loss": 0.1911, + "num_input_tokens_seen": 13484668, + "step": 139 + }, + { + "epoch": 0.013590144700821274, + "loss": 0.2604469060897827, + "loss_ce": 0.01075209304690361, + "loss_iou": 0.33984375, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 13484668, + "step": 139 + }, + { + "epoch": 0.01368791552600704, + "grad_norm": 3.6099918216737095, + "learning_rate": 5e-05, + "loss": 0.1702, + "num_input_tokens_seen": 13580436, + "step": 140 + }, + { + "epoch": 0.01368791552600704, + "loss": 0.17385022342205048, + "loss_ce": 0.01247327495366335, + "loss_iou": 0.302734375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 13580436, + "step": 140 + }, + { + "epoch": 0.013785686351192804, + "grad_norm": 2.6227026355563496, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 13676152, + "step": 141 + }, + { + "epoch": 0.013785686351192804, + "loss": 0.16737376153469086, + "loss_ce": 0.00962840300053358, + "loss_iou": 0.146484375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 13676152, + "step": 141 + }, + { + "epoch": 0.013883457176378568, + "grad_norm": 3.952928106901471, + "learning_rate": 5e-05, + "loss": 0.2287, + "num_input_tokens_seen": 13773416, + "step": 142 + }, + { + "epoch": 0.013883457176378568, + "loss": 0.23832044005393982, + "loss_ce": 0.008278928697109222, + "loss_iou": 0.353515625, + "loss_num": 0.0458984375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 13773416, + "step": 142 + }, + { + "epoch": 0.013981228001564334, + "grad_norm": 7.8584371301362514, + "learning_rate": 5e-05, + "loss": 0.1862, + "num_input_tokens_seen": 13870124, + "step": 143 + }, + { + "epoch": 0.013981228001564334, + "loss": 0.1977016031742096, + "loss_ce": 0.006997269578278065, + "loss_iou": 0.244140625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 13870124, + "step": 143 + }, + { + "epoch": 0.014078998826750098, + "grad_norm": 5.335608782219375, + "learning_rate": 5e-05, + "loss": 0.3333, + "num_input_tokens_seen": 13967064, + "step": 144 + }, + { + "epoch": 0.014078998826750098, + "loss": 0.3780639171600342, + "loss_ce": 0.005749461241066456, + "loss_iou": 0.63671875, + "loss_num": 0.07421875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 13967064, + "step": 144 + }, + { + "epoch": 0.014176769651935862, + "grad_norm": 6.003769725742962, + "learning_rate": 5e-05, + "loss": 0.327, + "num_input_tokens_seen": 14063132, + "step": 145 + }, + { + "epoch": 0.014176769651935862, + "loss": 0.41254234313964844, + "loss_ce": 0.009710311889648438, + "loss_iou": 0.23046875, + "loss_num": 0.08056640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 14063132, + "step": 145 + }, + { + "epoch": 0.014274540477121627, + "grad_norm": 4.765649059395356, + "learning_rate": 5e-05, + "loss": 0.2501, + "num_input_tokens_seen": 14159640, + "step": 146 + }, + { + "epoch": 0.014274540477121627, + "loss": 0.27320703864097595, + "loss_ce": 0.016859401017427444, + "loss_iou": 0.14453125, + "loss_num": 0.05126953125, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 14159640, + "step": 146 + }, + { + "epoch": 0.014372311302307391, + "grad_norm": 5.889510859816225, + "learning_rate": 5e-05, + "loss": 0.1884, + "num_input_tokens_seen": 14257500, + "step": 147 + }, + { + "epoch": 0.014372311302307391, + "loss": 0.13382816314697266, + "loss_ce": 0.007546433247625828, + "loss_iou": 0.34765625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 14257500, + "step": 147 + }, + { + "epoch": 0.014470082127493155, + "grad_norm": 3.4268935762984842, + "learning_rate": 5e-05, + "loss": 0.2471, + "num_input_tokens_seen": 14355028, + "step": 148 + }, + { + "epoch": 0.014470082127493155, + "loss": 0.21321283280849457, + "loss_ce": 0.007768499664962292, + "loss_iou": 0.212890625, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 14355028, + "step": 148 + }, + { + "epoch": 0.014567852952678921, + "grad_norm": 5.192243217381898, + "learning_rate": 5e-05, + "loss": 0.2858, + "num_input_tokens_seen": 14452488, + "step": 149 + }, + { + "epoch": 0.014567852952678921, + "loss": 0.31818515062332153, + "loss_ce": 0.007882438600063324, + "loss_iou": 0.2275390625, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 14452488, + "step": 149 + }, + { + "epoch": 0.014665623777864685, + "grad_norm": 6.790361673508256, + "learning_rate": 5e-05, + "loss": 0.2488, + "num_input_tokens_seen": 14549076, + "step": 150 + }, + { + "epoch": 0.014665623777864685, + "loss": 0.2456006556749344, + "loss_ce": 0.006586991250514984, + "loss_iou": 0.244140625, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 14549076, + "step": 150 + }, + { + "epoch": 0.014763394603050449, + "grad_norm": 4.1774787270426685, + "learning_rate": 5e-05, + "loss": 0.1379, + "num_input_tokens_seen": 14644776, + "step": 151 + }, + { + "epoch": 0.014763394603050449, + "loss": 0.13555729389190674, + "loss_ce": 0.013090256601572037, + "loss_iou": 0.3671875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 14644776, + "step": 151 + }, + { + "epoch": 0.014861165428236215, + "grad_norm": 3.6312664337768643, + "learning_rate": 5e-05, + "loss": 0.2208, + "num_input_tokens_seen": 14742344, + "step": 152 + }, + { + "epoch": 0.014861165428236215, + "loss": 0.16522523760795593, + "loss_ce": 0.002810694044455886, + "loss_iou": 0.220703125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 14742344, + "step": 152 + }, + { + "epoch": 0.014958936253421979, + "grad_norm": 11.715349723012862, + "learning_rate": 5e-05, + "loss": 0.1902, + "num_input_tokens_seen": 14838716, + "step": 153 + }, + { + "epoch": 0.014958936253421979, + "loss": 0.193581685423851, + "loss_ce": 0.009560687467455864, + "loss_iou": 0.2421875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 14838716, + "step": 153 + }, + { + "epoch": 0.015056707078607743, + "grad_norm": 6.617880178490444, + "learning_rate": 5e-05, + "loss": 0.2476, + "num_input_tokens_seen": 14935172, + "step": 154 + }, + { + "epoch": 0.015056707078607743, + "loss": 0.2454570233821869, + "loss_ce": 0.017551738768815994, + "loss_iou": 0.29296875, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 14935172, + "step": 154 + }, + { + "epoch": 0.015154477903793508, + "grad_norm": 6.361877015939392, + "learning_rate": 5e-05, + "loss": 0.255, + "num_input_tokens_seen": 15031548, + "step": 155 + }, + { + "epoch": 0.015154477903793508, + "loss": 0.2781255841255188, + "loss_ce": 0.009692991152405739, + "loss_iou": 0.27734375, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 15031548, + "step": 155 + }, + { + "epoch": 0.015252248728979272, + "grad_norm": 2.8433052884247183, + "learning_rate": 5e-05, + "loss": 0.1749, + "num_input_tokens_seen": 15128736, + "step": 156 + }, + { + "epoch": 0.015252248728979272, + "loss": 0.1680954396724701, + "loss_ce": 0.005436745472252369, + "loss_iou": 0.22265625, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 15128736, + "step": 156 + }, + { + "epoch": 0.015350019554165038, + "grad_norm": 2.670280180050439, + "learning_rate": 5e-05, + "loss": 0.2558, + "num_input_tokens_seen": 15226316, + "step": 157 + }, + { + "epoch": 0.015350019554165038, + "loss": 0.2384186089038849, + "loss_ce": 0.009048495441675186, + "loss_iou": 0.431640625, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 15226316, + "step": 157 + }, + { + "epoch": 0.015447790379350802, + "grad_norm": 4.4702505607611105, + "learning_rate": 5e-05, + "loss": 0.2285, + "num_input_tokens_seen": 15323552, + "step": 158 + }, + { + "epoch": 0.015447790379350802, + "loss": 0.2315988540649414, + "loss_ce": 0.007142066024243832, + "loss_iou": 0.43359375, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 15323552, + "step": 158 + }, + { + "epoch": 0.015545561204536566, + "grad_norm": 3.7939881413668917, + "learning_rate": 5e-05, + "loss": 0.1792, + "num_input_tokens_seen": 15419944, + "step": 159 + }, + { + "epoch": 0.015545561204536566, + "loss": 0.18474744260311127, + "loss_ce": 0.007226691115647554, + "loss_iou": 0.15625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 15419944, + "step": 159 + }, + { + "epoch": 0.01564333202972233, + "grad_norm": 3.425772459817927, + "learning_rate": 5e-05, + "loss": 0.1748, + "num_input_tokens_seen": 15516852, + "step": 160 + }, + { + "epoch": 0.01564333202972233, + "loss": 0.1437770128250122, + "loss_ce": 0.00425063818693161, + "loss_iou": 0.328125, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 15516852, + "step": 160 + }, + { + "epoch": 0.015741102854908096, + "grad_norm": 8.325884240193455, + "learning_rate": 5e-05, + "loss": 0.1907, + "num_input_tokens_seen": 15613044, + "step": 161 + }, + { + "epoch": 0.015741102854908096, + "loss": 0.12323015928268433, + "loss_ce": 0.005035583861172199, + "loss_iou": 0.2734375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 15613044, + "step": 161 + }, + { + "epoch": 0.01583887368009386, + "grad_norm": 5.13189079811308, + "learning_rate": 5e-05, + "loss": 0.2279, + "num_input_tokens_seen": 15709876, + "step": 162 + }, + { + "epoch": 0.01583887368009386, + "loss": 0.2876690924167633, + "loss_ce": 0.009837075136601925, + "loss_iou": 0.279296875, + "loss_num": 0.0556640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 15709876, + "step": 162 + }, + { + "epoch": 0.015936644505279624, + "grad_norm": 6.004651160820632, + "learning_rate": 5e-05, + "loss": 0.2032, + "num_input_tokens_seen": 15807532, + "step": 163 + }, + { + "epoch": 0.015936644505279624, + "loss": 0.2009119838476181, + "loss_ce": 0.003158074803650379, + "loss_iou": 0.47265625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 15807532, + "step": 163 + }, + { + "epoch": 0.016034415330465387, + "grad_norm": 4.4547635368032585, + "learning_rate": 5e-05, + "loss": 0.2185, + "num_input_tokens_seen": 15904944, + "step": 164 + }, + { + "epoch": 0.016034415330465387, + "loss": 0.17350733280181885, + "loss_ce": 0.0025478703901171684, + "loss_iou": 0.30859375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 15904944, + "step": 164 + }, + { + "epoch": 0.016132186155651155, + "grad_norm": 2.636115695570698, + "learning_rate": 5e-05, + "loss": 0.1806, + "num_input_tokens_seen": 16002160, + "step": 165 + }, + { + "epoch": 0.016132186155651155, + "loss": 0.18415787816047668, + "loss_ce": 0.00953628495335579, + "loss_iou": 0.318359375, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 16002160, + "step": 165 + }, + { + "epoch": 0.01622995698083692, + "grad_norm": 4.123844435969535, + "learning_rate": 5e-05, + "loss": 0.2078, + "num_input_tokens_seen": 16099316, + "step": 166 + }, + { + "epoch": 0.01622995698083692, + "loss": 0.18492092192173004, + "loss_ce": 0.008956566452980042, + "loss_iou": 0.404296875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 16099316, + "step": 166 + }, + { + "epoch": 0.016327727806022683, + "grad_norm": 3.4854223581261237, + "learning_rate": 5e-05, + "loss": 0.168, + "num_input_tokens_seen": 16196520, + "step": 167 + }, + { + "epoch": 0.016327727806022683, + "loss": 0.17234443128108978, + "loss_ce": 0.008159858174622059, + "loss_iou": 0.470703125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 16196520, + "step": 167 + }, + { + "epoch": 0.016425498631208447, + "grad_norm": 4.920222281676913, + "learning_rate": 5e-05, + "loss": 0.2929, + "num_input_tokens_seen": 16293164, + "step": 168 + }, + { + "epoch": 0.016425498631208447, + "loss": 0.350635826587677, + "loss_ce": 0.012928299605846405, + "loss_iou": 0.263671875, + "loss_num": 0.0673828125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 16293164, + "step": 168 + }, + { + "epoch": 0.01652326945639421, + "grad_norm": 5.178713399403613, + "learning_rate": 5e-05, + "loss": 0.1616, + "num_input_tokens_seen": 16389728, + "step": 169 + }, + { + "epoch": 0.01652326945639421, + "loss": 0.18247342109680176, + "loss_ce": 0.011254554614424706, + "loss_iou": 0.2177734375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 16389728, + "step": 169 + }, + { + "epoch": 0.016621040281579978, + "grad_norm": 10.893808881406818, + "learning_rate": 5e-05, + "loss": 0.3216, + "num_input_tokens_seen": 16486528, + "step": 170 + }, + { + "epoch": 0.016621040281579978, + "loss": 0.2885724902153015, + "loss_ce": 0.00475900387391448, + "loss_iou": 0.51171875, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 16486528, + "step": 170 + }, + { + "epoch": 0.016718811106765742, + "grad_norm": 5.991508545140038, + "learning_rate": 5e-05, + "loss": 0.2232, + "num_input_tokens_seen": 16583588, + "step": 171 + }, + { + "epoch": 0.016718811106765742, + "loss": 0.2474888563156128, + "loss_ce": 0.006888265255838633, + "loss_iou": 0.326171875, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 16583588, + "step": 171 + }, + { + "epoch": 0.016816581931951506, + "grad_norm": 2.7228565523745645, + "learning_rate": 5e-05, + "loss": 0.1818, + "num_input_tokens_seen": 16679720, + "step": 172 + }, + { + "epoch": 0.016816581931951506, + "loss": 0.15624107420444489, + "loss_ce": 0.011526713147759438, + "loss_iou": 0.28125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 16679720, + "step": 172 + }, + { + "epoch": 0.01691435275713727, + "grad_norm": 2.2553927430075844, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 16776552, + "step": 173 + }, + { + "epoch": 0.01691435275713727, + "loss": 0.13777180016040802, + "loss_ce": 0.01101703941822052, + "loss_iou": 0.279296875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 16776552, + "step": 173 + }, + { + "epoch": 0.017012123582323034, + "grad_norm": 4.069832084110328, + "learning_rate": 5e-05, + "loss": 0.1425, + "num_input_tokens_seen": 16873360, + "step": 174 + }, + { + "epoch": 0.017012123582323034, + "loss": 0.13716575503349304, + "loss_ce": 0.004994123242795467, + "loss_iou": 0.30078125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 16873360, + "step": 174 + }, + { + "epoch": 0.017109894407508798, + "grad_norm": 4.540943274220243, + "learning_rate": 5e-05, + "loss": 0.3601, + "num_input_tokens_seen": 16970456, + "step": 175 + }, + { + "epoch": 0.017109894407508798, + "loss": 0.37488245964050293, + "loss_ce": 0.004154950845986605, + "loss_iou": 0.53515625, + "loss_num": 0.07421875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 16970456, + "step": 175 + }, + { + "epoch": 0.017207665232694565, + "grad_norm": 4.204424063862682, + "learning_rate": 5e-05, + "loss": 0.2046, + "num_input_tokens_seen": 17066976, + "step": 176 + }, + { + "epoch": 0.017207665232694565, + "loss": 0.22976934909820557, + "loss_ce": 0.012789357453584671, + "loss_iou": 0.2255859375, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 17066976, + "step": 176 + }, + { + "epoch": 0.01730543605788033, + "grad_norm": 2.8682478271389047, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 17163808, + "step": 177 + }, + { + "epoch": 0.01730543605788033, + "loss": 0.11043824255466461, + "loss_ce": 0.0037487922236323357, + "loss_iou": 0.37109375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 17163808, + "step": 177 + }, + { + "epoch": 0.017403206883066093, + "grad_norm": 5.831563262510318, + "learning_rate": 5e-05, + "loss": 0.2909, + "num_input_tokens_seen": 17260484, + "step": 178 + }, + { + "epoch": 0.017403206883066093, + "loss": 0.3247200548648834, + "loss_ce": 0.0058113643899559975, + "loss_iou": 0.291015625, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 17260484, + "step": 178 + }, + { + "epoch": 0.017500977708251857, + "grad_norm": 4.180162309610195, + "learning_rate": 5e-05, + "loss": 0.1649, + "num_input_tokens_seen": 17358760, + "step": 179 + }, + { + "epoch": 0.017500977708251857, + "loss": 0.14065033197402954, + "loss_ce": 0.006022042129188776, + "loss_iou": 0.3046875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 17358760, + "step": 179 + }, + { + "epoch": 0.01759874853343762, + "grad_norm": 7.746516467892613, + "learning_rate": 5e-05, + "loss": 0.1872, + "num_input_tokens_seen": 17455992, + "step": 180 + }, + { + "epoch": 0.01759874853343762, + "loss": 0.2534911632537842, + "loss_ce": 0.0038573727943003178, + "loss_iou": 0.2431640625, + "loss_num": 0.0498046875, + "loss_xval": 0.25, + "num_input_tokens_seen": 17455992, + "step": 180 + }, + { + "epoch": 0.017696519358623385, + "grad_norm": 4.07867245956029, + "learning_rate": 5e-05, + "loss": 0.2092, + "num_input_tokens_seen": 17551820, + "step": 181 + }, + { + "epoch": 0.017696519358623385, + "loss": 0.18453723192214966, + "loss_ce": 0.006314574740827084, + "loss_iou": 0.39453125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 17551820, + "step": 181 + }, + { + "epoch": 0.017794290183809153, + "grad_norm": 4.052600639808195, + "learning_rate": 5e-05, + "loss": 0.171, + "num_input_tokens_seen": 17649988, + "step": 182 + }, + { + "epoch": 0.017794290183809153, + "loss": 0.13870930671691895, + "loss_ce": 0.005713696125894785, + "loss_iou": 0.361328125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 17649988, + "step": 182 + }, + { + "epoch": 0.017892061008994917, + "grad_norm": 6.347618046163547, + "learning_rate": 5e-05, + "loss": 0.1739, + "num_input_tokens_seen": 17748056, + "step": 183 + }, + { + "epoch": 0.017892061008994917, + "loss": 0.16482040286064148, + "loss_ce": 0.004420003388077021, + "loss_iou": 0.359375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 17748056, + "step": 183 + }, + { + "epoch": 0.01798983183418068, + "grad_norm": 5.534380451802281, + "learning_rate": 5e-05, + "loss": 0.2319, + "num_input_tokens_seen": 17845320, + "step": 184 + }, + { + "epoch": 0.01798983183418068, + "loss": 0.234937384724617, + "loss_ce": 0.0081307552754879, + "loss_iou": 0.47265625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 17845320, + "step": 184 + }, + { + "epoch": 0.018087602659366445, + "grad_norm": 3.857486997407134, + "learning_rate": 5e-05, + "loss": 0.224, + "num_input_tokens_seen": 17942744, + "step": 185 + }, + { + "epoch": 0.018087602659366445, + "loss": 0.21774786710739136, + "loss_ce": 0.010167299769818783, + "loss_iou": 0.310546875, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 17942744, + "step": 185 + }, + { + "epoch": 0.01818537348455221, + "grad_norm": 4.047115208017783, + "learning_rate": 5e-05, + "loss": 0.2033, + "num_input_tokens_seen": 18038364, + "step": 186 + }, + { + "epoch": 0.01818537348455221, + "loss": 0.19825530052185059, + "loss_ce": 0.013135677203536034, + "loss_iou": 0.240234375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 18038364, + "step": 186 + }, + { + "epoch": 0.018283144309737973, + "grad_norm": 6.849605953376123, + "learning_rate": 5e-05, + "loss": 0.169, + "num_input_tokens_seen": 18134916, + "step": 187 + }, + { + "epoch": 0.018283144309737973, + "loss": 0.23100152611732483, + "loss_ce": 0.001906073186546564, + "loss_iou": 0.1689453125, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 18134916, + "step": 187 + }, + { + "epoch": 0.01838091513492374, + "grad_norm": 6.776897084068749, + "learning_rate": 5e-05, + "loss": 0.2022, + "num_input_tokens_seen": 18231720, + "step": 188 + }, + { + "epoch": 0.01838091513492374, + "loss": 0.15283513069152832, + "loss_ce": 0.007998712360858917, + "loss_iou": 0.298828125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 18231720, + "step": 188 + }, + { + "epoch": 0.018478685960109504, + "grad_norm": 4.8051110980397995, + "learning_rate": 5e-05, + "loss": 0.1849, + "num_input_tokens_seen": 18328780, + "step": 189 + }, + { + "epoch": 0.018478685960109504, + "loss": 0.21661853790283203, + "loss_ce": 0.010319722816348076, + "loss_iou": 0.404296875, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 18328780, + "step": 189 + }, + { + "epoch": 0.018576456785295268, + "grad_norm": 2.4805787191074966, + "learning_rate": 5e-05, + "loss": 0.1461, + "num_input_tokens_seen": 18426528, + "step": 190 + }, + { + "epoch": 0.018576456785295268, + "loss": 0.15687395632266998, + "loss_ce": 0.017072929069399834, + "loss_iou": 0.4140625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 18426528, + "step": 190 + }, + { + "epoch": 0.018674227610481032, + "grad_norm": 4.392834620638861, + "learning_rate": 5e-05, + "loss": 0.1816, + "num_input_tokens_seen": 18524080, + "step": 191 + }, + { + "epoch": 0.018674227610481032, + "loss": 0.1512446403503418, + "loss_ce": 0.004516131244599819, + "loss_iou": 0.515625, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 18524080, + "step": 191 + }, + { + "epoch": 0.018771998435666796, + "grad_norm": 5.654688811090168, + "learning_rate": 5e-05, + "loss": 0.2604, + "num_input_tokens_seen": 18620840, + "step": 192 + }, + { + "epoch": 0.018771998435666796, + "loss": 0.23847824335098267, + "loss_ce": 0.004042210057377815, + "loss_iou": 0.265625, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 18620840, + "step": 192 + }, + { + "epoch": 0.018869769260852563, + "grad_norm": 4.010497945684997, + "learning_rate": 5e-05, + "loss": 0.2057, + "num_input_tokens_seen": 18717436, + "step": 193 + }, + { + "epoch": 0.018869769260852563, + "loss": 0.19534267485141754, + "loss_ce": 0.00875820778310299, + "loss_iou": 0.330078125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 18717436, + "step": 193 + }, + { + "epoch": 0.018967540086038327, + "grad_norm": 2.6294774823268274, + "learning_rate": 5e-05, + "loss": 0.1441, + "num_input_tokens_seen": 18813600, + "step": 194 + }, + { + "epoch": 0.018967540086038327, + "loss": 0.18927106261253357, + "loss_ce": 0.009461482986807823, + "loss_iou": 0.275390625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 18813600, + "step": 194 + }, + { + "epoch": 0.01906531091122409, + "grad_norm": 3.936537066960987, + "learning_rate": 5e-05, + "loss": 0.1775, + "num_input_tokens_seen": 18910520, + "step": 195 + }, + { + "epoch": 0.01906531091122409, + "loss": 0.19100917875766754, + "loss_ce": 0.004302641376852989, + "loss_iou": 0.376953125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 18910520, + "step": 195 + }, + { + "epoch": 0.019163081736409855, + "grad_norm": 4.437186544598141, + "learning_rate": 5e-05, + "loss": 0.2609, + "num_input_tokens_seen": 19007496, + "step": 196 + }, + { + "epoch": 0.019163081736409855, + "loss": 0.23496603965759277, + "loss_ce": 0.0060231732204556465, + "loss_iou": 0.447265625, + "loss_num": 0.0458984375, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 19007496, + "step": 196 + }, + { + "epoch": 0.01926085256159562, + "grad_norm": 3.4151823875773197, + "learning_rate": 5e-05, + "loss": 0.2156, + "num_input_tokens_seen": 19105252, + "step": 197 + }, + { + "epoch": 0.01926085256159562, + "loss": 0.1836908459663391, + "loss_ce": 0.005163006484508514, + "loss_iou": 0.275390625, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 19105252, + "step": 197 + }, + { + "epoch": 0.019358623386781383, + "grad_norm": 3.505773498721135, + "learning_rate": 5e-05, + "loss": 0.2047, + "num_input_tokens_seen": 19202460, + "step": 198 + }, + { + "epoch": 0.019358623386781383, + "loss": 0.17738567292690277, + "loss_ce": 0.00770794041454792, + "loss_iou": 0.232421875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 19202460, + "step": 198 + }, + { + "epoch": 0.01945639421196715, + "grad_norm": 3.325809440586205, + "learning_rate": 5e-05, + "loss": 0.1764, + "num_input_tokens_seen": 19299328, + "step": 199 + }, + { + "epoch": 0.01945639421196715, + "loss": 0.1906391680240631, + "loss_ce": 0.012050291523337364, + "loss_iou": 0.318359375, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 19299328, + "step": 199 + }, + { + "epoch": 0.019554165037152915, + "grad_norm": 3.905494983675359, + "learning_rate": 5e-05, + "loss": 0.1543, + "num_input_tokens_seen": 19396128, + "step": 200 + }, + { + "epoch": 0.019554165037152915, + "loss": 0.20669016242027283, + "loss_ce": 0.006769509986042976, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 19396128, + "step": 200 + }, + { + "epoch": 0.01965193586233868, + "grad_norm": 5.494490665500868, + "learning_rate": 5e-05, + "loss": 0.2879, + "num_input_tokens_seen": 19492836, + "step": 201 + }, + { + "epoch": 0.01965193586233868, + "loss": 0.30611419677734375, + "loss_ce": 0.010826095007359982, + "loss_iou": 0.474609375, + "loss_num": 0.058837890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 19492836, + "step": 201 + }, + { + "epoch": 0.019749706687524442, + "grad_norm": 4.656231989876002, + "learning_rate": 5e-05, + "loss": 0.1882, + "num_input_tokens_seen": 19589356, + "step": 202 + }, + { + "epoch": 0.019749706687524442, + "loss": 0.1299842894077301, + "loss_ce": 0.0075325011275708675, + "loss_iou": 0.259765625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 19589356, + "step": 202 + }, + { + "epoch": 0.019847477512710206, + "grad_norm": 4.35990468233768, + "learning_rate": 5e-05, + "loss": 0.2093, + "num_input_tokens_seen": 19687100, + "step": 203 + }, + { + "epoch": 0.019847477512710206, + "loss": 0.22325515747070312, + "loss_ce": 0.007923135533928871, + "loss_iou": 0.52734375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 19687100, + "step": 203 + }, + { + "epoch": 0.01994524833789597, + "grad_norm": 3.9001674728992763, + "learning_rate": 5e-05, + "loss": 0.1919, + "num_input_tokens_seen": 19783676, + "step": 204 + }, + { + "epoch": 0.01994524833789597, + "loss": 0.16695955395698547, + "loss_ce": 0.012540601193904877, + "loss_iou": 0.2255859375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 19783676, + "step": 204 + }, + { + "epoch": 0.020043019163081738, + "grad_norm": 2.709986537193714, + "learning_rate": 5e-05, + "loss": 0.2024, + "num_input_tokens_seen": 19880884, + "step": 205 + }, + { + "epoch": 0.020043019163081738, + "loss": 0.18970492482185364, + "loss_ce": 0.007087749429047108, + "loss_iou": 0.2421875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 19880884, + "step": 205 + }, + { + "epoch": 0.020140789988267502, + "grad_norm": 3.5449393881740137, + "learning_rate": 5e-05, + "loss": 0.2121, + "num_input_tokens_seen": 19978148, + "step": 206 + }, + { + "epoch": 0.020140789988267502, + "loss": 0.23497042059898376, + "loss_ce": 0.00816377718001604, + "loss_iou": 0.302734375, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 19978148, + "step": 206 + }, + { + "epoch": 0.020238560813453266, + "grad_norm": 2.6373543725228847, + "learning_rate": 5e-05, + "loss": 0.1715, + "num_input_tokens_seen": 20075008, + "step": 207 + }, + { + "epoch": 0.020238560813453266, + "loss": 0.19786903262138367, + "loss_ce": 0.006310196593403816, + "loss_iou": 0.2080078125, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 20075008, + "step": 207 + }, + { + "epoch": 0.02033633163863903, + "grad_norm": 4.354367065508418, + "learning_rate": 5e-05, + "loss": 0.1368, + "num_input_tokens_seen": 20172880, + "step": 208 + }, + { + "epoch": 0.02033633163863903, + "loss": 0.11085684597492218, + "loss_ce": 0.00932486541569233, + "loss_iou": 0.54296875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 20172880, + "step": 208 + }, + { + "epoch": 0.020434102463824794, + "grad_norm": 4.8514648760533845, + "learning_rate": 5e-05, + "loss": 0.1847, + "num_input_tokens_seen": 20270116, + "step": 209 + }, + { + "epoch": 0.020434102463824794, + "loss": 0.1718958467245102, + "loss_ce": 0.008779393509030342, + "loss_iou": 0.50390625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 20270116, + "step": 209 + }, + { + "epoch": 0.020531873289010558, + "grad_norm": 7.771809872269921, + "learning_rate": 5e-05, + "loss": 0.2007, + "num_input_tokens_seen": 20368116, + "step": 210 + }, + { + "epoch": 0.020531873289010558, + "loss": 0.17879992723464966, + "loss_ce": 0.00692492350935936, + "loss_iou": 0.375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 20368116, + "step": 210 + }, + { + "epoch": 0.020629644114196325, + "grad_norm": 4.761343504475074, + "learning_rate": 5e-05, + "loss": 0.1954, + "num_input_tokens_seen": 20464536, + "step": 211 + }, + { + "epoch": 0.020629644114196325, + "loss": 0.179729163646698, + "loss_ce": 0.006206207908689976, + "loss_iou": 0.384765625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 20464536, + "step": 211 + }, + { + "epoch": 0.02072741493938209, + "grad_norm": 10.101523217338737, + "learning_rate": 5e-05, + "loss": 0.1662, + "num_input_tokens_seen": 20561408, + "step": 212 + }, + { + "epoch": 0.02072741493938209, + "loss": 0.13892659544944763, + "loss_ce": 0.005839441437274218, + "loss_iou": 0.37109375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 20561408, + "step": 212 + }, + { + "epoch": 0.020825185764567853, + "grad_norm": 4.177481432291454, + "learning_rate": 5e-05, + "loss": 0.2181, + "num_input_tokens_seen": 20658664, + "step": 213 + }, + { + "epoch": 0.020825185764567853, + "loss": 0.225188210606575, + "loss_ce": 0.008269257843494415, + "loss_iou": 0.3984375, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 20658664, + "step": 213 + }, + { + "epoch": 0.020922956589753617, + "grad_norm": 10.021488989502544, + "learning_rate": 5e-05, + "loss": 0.2007, + "num_input_tokens_seen": 20753540, + "step": 214 + }, + { + "epoch": 0.020922956589753617, + "loss": 0.1644308716058731, + "loss_ce": 0.009645718149840832, + "loss_iou": 0.474609375, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 20753540, + "step": 214 + }, + { + "epoch": 0.02102072741493938, + "grad_norm": 3.8732676243229482, + "learning_rate": 5e-05, + "loss": 0.1581, + "num_input_tokens_seen": 20850736, + "step": 215 + }, + { + "epoch": 0.02102072741493938, + "loss": 0.1574481725692749, + "loss_ce": 0.0032123371493071318, + "loss_iou": 0.4375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 20850736, + "step": 215 + }, + { + "epoch": 0.02111849824012515, + "grad_norm": 6.069424884442629, + "learning_rate": 5e-05, + "loss": 0.2869, + "num_input_tokens_seen": 20947884, + "step": 216 + }, + { + "epoch": 0.02111849824012515, + "loss": 0.33885467052459717, + "loss_ce": 0.006640331353992224, + "loss_iou": 0.255859375, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 20947884, + "step": 216 + }, + { + "epoch": 0.021216269065310912, + "grad_norm": 5.6979309601753325, + "learning_rate": 5e-05, + "loss": 0.1973, + "num_input_tokens_seen": 21046128, + "step": 217 + }, + { + "epoch": 0.021216269065310912, + "loss": 0.17598581314086914, + "loss_ce": 0.004843240603804588, + "loss_iou": 0.33984375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 21046128, + "step": 217 + }, + { + "epoch": 0.021314039890496676, + "grad_norm": 4.942053652751888, + "learning_rate": 5e-05, + "loss": 0.1901, + "num_input_tokens_seen": 21142680, + "step": 218 + }, + { + "epoch": 0.021314039890496676, + "loss": 0.2225324511528015, + "loss_ce": 0.008726293221116066, + "loss_iou": 0.298828125, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 21142680, + "step": 218 + }, + { + "epoch": 0.02141181071568244, + "grad_norm": 10.263143532038141, + "learning_rate": 5e-05, + "loss": 0.171, + "num_input_tokens_seen": 21239140, + "step": 219 + }, + { + "epoch": 0.02141181071568244, + "loss": 0.1613401174545288, + "loss_ce": 0.003503195010125637, + "loss_iou": 0.2734375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 21239140, + "step": 219 + }, + { + "epoch": 0.021509581540868204, + "grad_norm": 3.537300738488836, + "learning_rate": 5e-05, + "loss": 0.18, + "num_input_tokens_seen": 21335496, + "step": 220 + }, + { + "epoch": 0.021509581540868204, + "loss": 0.17299333214759827, + "loss_ce": 0.008442549034953117, + "loss_iou": 0.328125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 21335496, + "step": 220 + }, + { + "epoch": 0.021607352366053968, + "grad_norm": 6.012304062314275, + "learning_rate": 5e-05, + "loss": 0.2282, + "num_input_tokens_seen": 21432232, + "step": 221 + }, + { + "epoch": 0.021607352366053968, + "loss": 0.2338802069425583, + "loss_ce": 0.012017415836453438, + "loss_iou": 0.2431640625, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 21432232, + "step": 221 + }, + { + "epoch": 0.021705123191239736, + "grad_norm": 9.416389005951554, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 21528348, + "step": 222 + }, + { + "epoch": 0.021705123191239736, + "loss": 0.1393304467201233, + "loss_ce": 0.008410033769905567, + "loss_iou": 0.35546875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 21528348, + "step": 222 + }, + { + "epoch": 0.0218028940164255, + "grad_norm": 3.357795007490628, + "learning_rate": 5e-05, + "loss": 0.1645, + "num_input_tokens_seen": 21625032, + "step": 223 + }, + { + "epoch": 0.0218028940164255, + "loss": 0.15558961033821106, + "loss_ce": 0.010234382934868336, + "loss_iou": 0.5234375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 21625032, + "step": 223 + }, + { + "epoch": 0.021900664841611264, + "grad_norm": 6.613894351680186, + "learning_rate": 5e-05, + "loss": 0.2757, + "num_input_tokens_seen": 21722320, + "step": 224 + }, + { + "epoch": 0.021900664841611264, + "loss": 0.32239851355552673, + "loss_ce": 0.01276713889092207, + "loss_iou": 0.353515625, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 21722320, + "step": 224 + }, + { + "epoch": 0.021998435666797028, + "grad_norm": 3.1247183570753125, + "learning_rate": 5e-05, + "loss": 0.1816, + "num_input_tokens_seen": 21818992, + "step": 225 + }, + { + "epoch": 0.021998435666797028, + "loss": 0.15236762166023254, + "loss_ce": 0.007439645007252693, + "loss_iou": 0.2197265625, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 21818992, + "step": 225 + }, + { + "epoch": 0.02209620649198279, + "grad_norm": 12.529191236105417, + "learning_rate": 5e-05, + "loss": 0.1747, + "num_input_tokens_seen": 21916532, + "step": 226 + }, + { + "epoch": 0.02209620649198279, + "loss": 0.1941104531288147, + "loss_ce": 0.004107998684048653, + "loss_iou": 0.451171875, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 21916532, + "step": 226 + }, + { + "epoch": 0.022193977317168555, + "grad_norm": 2.588409584401587, + "learning_rate": 5e-05, + "loss": 0.1999, + "num_input_tokens_seen": 22013984, + "step": 227 + }, + { + "epoch": 0.022193977317168555, + "loss": 0.17559391260147095, + "loss_ce": 0.0039630476385355, + "loss_iou": 0.251953125, + "loss_num": 0.0341796875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 22013984, + "step": 227 + }, + { + "epoch": 0.022291748142354323, + "grad_norm": 4.317239375836779, + "learning_rate": 5e-05, + "loss": 0.2021, + "num_input_tokens_seen": 22111024, + "step": 228 + }, + { + "epoch": 0.022291748142354323, + "loss": 0.2636571228504181, + "loss_ce": 0.009598283097147942, + "loss_iou": 0.2470703125, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 22111024, + "step": 228 + }, + { + "epoch": 0.022389518967540087, + "grad_norm": 2.632517276185303, + "learning_rate": 5e-05, + "loss": 0.1789, + "num_input_tokens_seen": 22207332, + "step": 229 + }, + { + "epoch": 0.022389518967540087, + "loss": 0.1600375771522522, + "loss_ce": 0.015201141126453876, + "loss_iou": 0.294921875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 22207332, + "step": 229 + }, + { + "epoch": 0.02248728979272585, + "grad_norm": 2.0940599454513933, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 22304048, + "step": 230 + }, + { + "epoch": 0.02248728979272585, + "loss": 0.07457196712493896, + "loss_ce": 0.004408237524330616, + "loss_iou": 0.31640625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 22304048, + "step": 230 + }, + { + "epoch": 0.022585060617911615, + "grad_norm": 4.933353187636331, + "learning_rate": 5e-05, + "loss": 0.2019, + "num_input_tokens_seen": 22401692, + "step": 231 + }, + { + "epoch": 0.022585060617911615, + "loss": 0.1797766536474228, + "loss_ce": 0.0041174739599227905, + "loss_iou": 0.39453125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 22401692, + "step": 231 + }, + { + "epoch": 0.02268283144309738, + "grad_norm": 4.465313883068962, + "learning_rate": 5e-05, + "loss": 0.1652, + "num_input_tokens_seen": 22498620, + "step": 232 + }, + { + "epoch": 0.02268283144309738, + "loss": 0.2205108255147934, + "loss_ce": 0.010153168812394142, + "loss_iou": 0.375, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 22498620, + "step": 232 + }, + { + "epoch": 0.022780602268283143, + "grad_norm": 3.128371911750791, + "learning_rate": 5e-05, + "loss": 0.1564, + "num_input_tokens_seen": 22595608, + "step": 233 + }, + { + "epoch": 0.022780602268283143, + "loss": 0.19156791269779205, + "loss_ce": 0.008096233010292053, + "loss_iou": 0.3046875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 22595608, + "step": 233 + }, + { + "epoch": 0.02287837309346891, + "grad_norm": 4.775702467122873, + "learning_rate": 5e-05, + "loss": 0.2247, + "num_input_tokens_seen": 22692444, + "step": 234 + }, + { + "epoch": 0.02287837309346891, + "loss": 0.24678927659988403, + "loss_ce": 0.013055147603154182, + "loss_iou": 0.271484375, + "loss_num": 0.046875, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 22692444, + "step": 234 + }, + { + "epoch": 0.022976143918654674, + "grad_norm": 4.998959044089345, + "learning_rate": 5e-05, + "loss": 0.1707, + "num_input_tokens_seen": 22790184, + "step": 235 + }, + { + "epoch": 0.022976143918654674, + "loss": 0.13382020592689514, + "loss_ce": 0.005829493515193462, + "loss_iou": 0.392578125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 22790184, + "step": 235 + }, + { + "epoch": 0.023073914743840438, + "grad_norm": 5.416326100875651, + "learning_rate": 5e-05, + "loss": 0.1212, + "num_input_tokens_seen": 22886992, + "step": 236 + }, + { + "epoch": 0.023073914743840438, + "loss": 0.12909317016601562, + "loss_ce": 0.011631015688180923, + "loss_iou": 0.2197265625, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 22886992, + "step": 236 + }, + { + "epoch": 0.023171685569026202, + "grad_norm": 10.843420821521505, + "learning_rate": 5e-05, + "loss": 0.17, + "num_input_tokens_seen": 22983884, + "step": 237 + }, + { + "epoch": 0.023171685569026202, + "loss": 0.18702515959739685, + "loss_ce": 0.007703881710767746, + "loss_iou": 0.265625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 22983884, + "step": 237 + }, + { + "epoch": 0.023269456394211966, + "grad_norm": 3.8316013066434476, + "learning_rate": 5e-05, + "loss": 0.1571, + "num_input_tokens_seen": 23081072, + "step": 238 + }, + { + "epoch": 0.023269456394211966, + "loss": 0.1587757170200348, + "loss_ce": 0.004784016869962215, + "loss_iou": 0.431640625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 23081072, + "step": 238 + }, + { + "epoch": 0.023367227219397733, + "grad_norm": 4.840383804483827, + "learning_rate": 5e-05, + "loss": 0.2374, + "num_input_tokens_seen": 23177564, + "step": 239 + }, + { + "epoch": 0.023367227219397733, + "loss": 0.3110654950141907, + "loss_ce": 0.005401449743658304, + "loss_iou": 0.267578125, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 23177564, + "step": 239 + }, + { + "epoch": 0.023464998044583497, + "grad_norm": 4.245824153620534, + "learning_rate": 5e-05, + "loss": 0.2035, + "num_input_tokens_seen": 23275024, + "step": 240 + }, + { + "epoch": 0.023464998044583497, + "loss": 0.18091849982738495, + "loss_ce": 0.006418989971280098, + "loss_iou": 0.19921875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 23275024, + "step": 240 + }, + { + "epoch": 0.02356276886976926, + "grad_norm": 6.534401866264797, + "learning_rate": 5e-05, + "loss": 0.1625, + "num_input_tokens_seen": 23372968, + "step": 241 + }, + { + "epoch": 0.02356276886976926, + "loss": 0.1986764669418335, + "loss_ce": 0.008124705404043198, + "loss_iou": 0.470703125, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 23372968, + "step": 241 + }, + { + "epoch": 0.023660539694955025, + "grad_norm": 6.131016970075904, + "learning_rate": 5e-05, + "loss": 0.1473, + "num_input_tokens_seen": 23468732, + "step": 242 + }, + { + "epoch": 0.023660539694955025, + "loss": 0.16628943383693695, + "loss_ce": 0.0041953157633543015, + "loss_iou": 0.33203125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 23468732, + "step": 242 + }, + { + "epoch": 0.02375831052014079, + "grad_norm": 5.9027790680845635, + "learning_rate": 5e-05, + "loss": 0.1984, + "num_input_tokens_seen": 23565492, + "step": 243 + }, + { + "epoch": 0.02375831052014079, + "loss": 0.16144300997257233, + "loss_ce": 0.006047505885362625, + "loss_iou": 0.259765625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 23565492, + "step": 243 + }, + { + "epoch": 0.023856081345326553, + "grad_norm": 3.4280668264405394, + "learning_rate": 5e-05, + "loss": 0.1886, + "num_input_tokens_seen": 23662788, + "step": 244 + }, + { + "epoch": 0.023856081345326553, + "loss": 0.1738182008266449, + "loss_ce": 0.0071617113426327705, + "loss_iou": 0.361328125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 23662788, + "step": 244 + }, + { + "epoch": 0.02395385217051232, + "grad_norm": 4.9991104176159915, + "learning_rate": 5e-05, + "loss": 0.1376, + "num_input_tokens_seen": 23759904, + "step": 245 + }, + { + "epoch": 0.02395385217051232, + "loss": 0.10628046095371246, + "loss_ce": 0.004290717653930187, + "loss_iou": 0.5078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 23759904, + "step": 245 + }, + { + "epoch": 0.024051622995698085, + "grad_norm": 21.434177141825504, + "learning_rate": 5e-05, + "loss": 0.1882, + "num_input_tokens_seen": 23857424, + "step": 246 + }, + { + "epoch": 0.024051622995698085, + "loss": 0.19310057163238525, + "loss_ce": 0.002609857125207782, + "loss_iou": 0.5625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 23857424, + "step": 246 + }, + { + "epoch": 0.02414939382088385, + "grad_norm": 8.971046136835202, + "learning_rate": 5e-05, + "loss": 0.2175, + "num_input_tokens_seen": 23954436, + "step": 247 + }, + { + "epoch": 0.02414939382088385, + "loss": 0.17910578846931458, + "loss_ce": 0.006559406872838736, + "loss_iou": 0.2216796875, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 23954436, + "step": 247 + }, + { + "epoch": 0.024247164646069613, + "grad_norm": 8.870987593350991, + "learning_rate": 5e-05, + "loss": 0.1827, + "num_input_tokens_seen": 24050816, + "step": 248 + }, + { + "epoch": 0.024247164646069613, + "loss": 0.15703310072422028, + "loss_ce": 0.010884422808885574, + "loss_iou": 0.2216796875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 24050816, + "step": 248 + }, + { + "epoch": 0.024344935471255377, + "grad_norm": 2.698299738307344, + "learning_rate": 5e-05, + "loss": 0.1708, + "num_input_tokens_seen": 24146648, + "step": 249 + }, + { + "epoch": 0.024344935471255377, + "loss": 0.19299691915512085, + "loss_ce": 0.0059546856209635735, + "loss_iou": 0.318359375, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 24146648, + "step": 249 + }, + { + "epoch": 0.02444270629644114, + "grad_norm": 1.9007817850390105, + "learning_rate": 5e-05, + "loss": 0.1719, + "num_input_tokens_seen": 24244240, + "step": 250 + }, + { + "epoch": 0.02444270629644114, + "eval_seeclick_CIoU": 0.3433030694723129, + "eval_seeclick_GIoU": 0.32753509283065796, + "eval_seeclick_IoU": 0.41665318608283997, + "eval_seeclick_MAE_all": 0.10997363924980164, + "eval_seeclick_MAE_h": 0.05114497244358063, + "eval_seeclick_MAE_w": 0.1677113026380539, + "eval_seeclick_MAE_x": 0.16648425161838531, + "eval_seeclick_MAE_y": 0.05455404706299305, + "eval_seeclick_NUM_probability": 0.9961552619934082, + "eval_seeclick_inside_bbox": 0.7230113744735718, + "eval_seeclick_loss": 0.38185954093933105, + "eval_seeclick_loss_ce": 0.01963688898831606, + "eval_seeclick_loss_iou": 0.510986328125, + "eval_seeclick_loss_num": 0.0682220458984375, + "eval_seeclick_loss_xval": 0.34088134765625, + "eval_seeclick_runtime": 74.825, + "eval_seeclick_samples_per_second": 0.575, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 24244240, + "step": 250 + }, + { + "epoch": 0.02444270629644114, + "eval_icons_CIoU": 0.58465576171875, + "eval_icons_GIoU": 0.5765199959278107, + "eval_icons_IoU": 0.618288516998291, + "eval_icons_MAE_all": 0.061851900070905685, + "eval_icons_MAE_h": 0.07860374078154564, + "eval_icons_MAE_w": 0.04607979953289032, + "eval_icons_MAE_x": 0.04718155972659588, + "eval_icons_MAE_y": 0.0755424965173006, + "eval_icons_NUM_probability": 0.9954328835010529, + "eval_icons_inside_bbox": 0.8506944477558136, + "eval_icons_loss": 0.21478092670440674, + "eval_icons_loss_ce": 0.00171481363940984, + "eval_icons_loss_iou": 0.4417724609375, + "eval_icons_loss_num": 0.04560089111328125, + "eval_icons_loss_xval": 0.227813720703125, + "eval_icons_runtime": 86.758, + "eval_icons_samples_per_second": 0.576, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 24244240, + "step": 250 + }, + { + "epoch": 0.02444270629644114, + "eval_screenspot_CIoU": 0.3591566781202952, + "eval_screenspot_GIoU": 0.35421378413836163, + "eval_screenspot_IoU": 0.42629826068878174, + "eval_screenspot_MAE_all": 0.12392568588256836, + "eval_screenspot_MAE_h": 0.09747755279143651, + "eval_screenspot_MAE_w": 0.1602166692415873, + "eval_screenspot_MAE_x": 0.15167777240276337, + "eval_screenspot_MAE_y": 0.0863307515780131, + "eval_screenspot_NUM_probability": 0.9960681994756063, + "eval_screenspot_inside_bbox": 0.7237499952316284, + "eval_screenspot_loss": 0.49942782521247864, + "eval_screenspot_loss_ce": 0.030636516710122425, + "eval_screenspot_loss_iou": 0.4984944661458333, + "eval_screenspot_loss_num": 0.09144083658854167, + "eval_screenspot_loss_xval": 0.4572347005208333, + "eval_screenspot_runtime": 149.5796, + "eval_screenspot_samples_per_second": 0.595, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 24244240, + "step": 250 + }, + { + "epoch": 0.02444270629644114, + "eval_compot_CIoU": 0.39355748891830444, + "eval_compot_GIoU": 0.3688967376947403, + "eval_compot_IoU": 0.48084449768066406, + "eval_compot_MAE_all": 0.12562128528952599, + "eval_compot_MAE_h": 0.08327788859605789, + "eval_compot_MAE_w": 0.1743333712220192, + "eval_compot_MAE_x": 0.162967249751091, + "eval_compot_MAE_y": 0.08190663531422615, + "eval_compot_NUM_probability": 0.9953396320343018, + "eval_compot_inside_bbox": 0.7083333432674408, + "eval_compot_loss": 0.38941848278045654, + "eval_compot_loss_ce": 0.02479570358991623, + "eval_compot_loss_iou": 0.422607421875, + "eval_compot_loss_num": 0.0668487548828125, + "eval_compot_loss_xval": 0.33392333984375, + "eval_compot_runtime": 85.1113, + "eval_compot_samples_per_second": 0.587, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 24244240, + "step": 250 + }, + { + "epoch": 0.02444270629644114, + "loss": 0.3528437614440918, + "loss_ce": 0.0303950235247612, + "loss_iou": 0.431640625, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 24244240, + "step": 250 + }, + { + "epoch": 0.024540477121626908, + "grad_norm": 8.797499929797482, + "learning_rate": 5e-05, + "loss": 0.2346, + "num_input_tokens_seen": 24341892, + "step": 251 + }, + { + "epoch": 0.024540477121626908, + "loss": 0.21230831742286682, + "loss_ce": 0.006436745636165142, + "loss_iou": 0.40234375, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 24341892, + "step": 251 + }, + { + "epoch": 0.024638247946812672, + "grad_norm": 4.565518679392026, + "learning_rate": 5e-05, + "loss": 0.3179, + "num_input_tokens_seen": 24438588, + "step": 252 + }, + { + "epoch": 0.024638247946812672, + "loss": 0.3111049234867096, + "loss_ce": 0.01191058848053217, + "loss_iou": 0.43359375, + "loss_num": 0.059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 24438588, + "step": 252 + }, + { + "epoch": 0.024736018771998436, + "grad_norm": 22.291557810991154, + "learning_rate": 5e-05, + "loss": 0.1458, + "num_input_tokens_seen": 24536188, + "step": 253 + }, + { + "epoch": 0.024736018771998436, + "loss": 0.1356201469898224, + "loss_ce": 0.004852320998907089, + "loss_iou": 0.5234375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 24536188, + "step": 253 + }, + { + "epoch": 0.0248337895971842, + "grad_norm": 3.7794301271678665, + "learning_rate": 5e-05, + "loss": 0.2549, + "num_input_tokens_seen": 24632672, + "step": 254 + }, + { + "epoch": 0.0248337895971842, + "loss": 0.2623138427734375, + "loss_ce": 0.009017959237098694, + "loss_iou": 0.4609375, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 24632672, + "step": 254 + }, + { + "epoch": 0.024931560422369964, + "grad_norm": 5.073163090012399, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 24729364, + "step": 255 + }, + { + "epoch": 0.024931560422369964, + "loss": 0.09428396821022034, + "loss_ce": 0.009292513132095337, + "loss_iou": 0.37109375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 24729364, + "step": 255 + }, + { + "epoch": 0.025029331247555728, + "grad_norm": 5.423484561456401, + "learning_rate": 5e-05, + "loss": 0.2817, + "num_input_tokens_seen": 24826084, + "step": 256 + }, + { + "epoch": 0.025029331247555728, + "loss": 0.2860683798789978, + "loss_ce": 0.009335001930594444, + "loss_iou": 0.4140625, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 24826084, + "step": 256 + }, + { + "epoch": 0.025127102072741495, + "grad_norm": 3.977936103794145, + "learning_rate": 5e-05, + "loss": 0.1739, + "num_input_tokens_seen": 24922744, + "step": 257 + }, + { + "epoch": 0.025127102072741495, + "loss": 0.22428329288959503, + "loss_ce": 0.0033970584627240896, + "loss_iou": 0.41015625, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 24922744, + "step": 257 + }, + { + "epoch": 0.02522487289792726, + "grad_norm": 7.066094030770507, + "learning_rate": 5e-05, + "loss": 0.2202, + "num_input_tokens_seen": 25020004, + "step": 258 + }, + { + "epoch": 0.02522487289792726, + "loss": 0.25914081931114197, + "loss_ce": 0.00840838998556137, + "loss_iou": 0.546875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 25020004, + "step": 258 + }, + { + "epoch": 0.025322643723113023, + "grad_norm": 2.5785893675171274, + "learning_rate": 5e-05, + "loss": 0.2252, + "num_input_tokens_seen": 25117836, + "step": 259 + }, + { + "epoch": 0.025322643723113023, + "loss": 0.2472132444381714, + "loss_ce": 0.007161975372582674, + "loss_iou": 0.3671875, + "loss_num": 0.0478515625, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 25117836, + "step": 259 + }, + { + "epoch": 0.025420414548298787, + "grad_norm": 6.059619395504322, + "learning_rate": 5e-05, + "loss": 0.186, + "num_input_tokens_seen": 25216324, + "step": 260 + }, + { + "epoch": 0.025420414548298787, + "loss": 0.16966035962104797, + "loss_ce": 0.007550983689725399, + "loss_iou": 0.359375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 25216324, + "step": 260 + }, + { + "epoch": 0.02551818537348455, + "grad_norm": 15.09896066351231, + "learning_rate": 5e-05, + "loss": 0.1608, + "num_input_tokens_seen": 25314332, + "step": 261 + }, + { + "epoch": 0.02551818537348455, + "loss": 0.17233175039291382, + "loss_ce": 0.008604929782450199, + "loss_iou": 0.37109375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 25314332, + "step": 261 + }, + { + "epoch": 0.025615956198670315, + "grad_norm": 3.678253037858367, + "learning_rate": 5e-05, + "loss": 0.2135, + "num_input_tokens_seen": 25411276, + "step": 262 + }, + { + "epoch": 0.025615956198670315, + "loss": 0.26012924313545227, + "loss_ce": 0.007840425707399845, + "loss_iou": 0.40625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 25411276, + "step": 262 + }, + { + "epoch": 0.025713727023856082, + "grad_norm": 9.180496904202482, + "learning_rate": 5e-05, + "loss": 0.2664, + "num_input_tokens_seen": 25508700, + "step": 263 + }, + { + "epoch": 0.025713727023856082, + "loss": 0.23047325015068054, + "loss_ce": 0.007939070463180542, + "loss_iou": 0.435546875, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 25508700, + "step": 263 + }, + { + "epoch": 0.025811497849041846, + "grad_norm": 18.062816176961594, + "learning_rate": 5e-05, + "loss": 0.2321, + "num_input_tokens_seen": 25605796, + "step": 264 + }, + { + "epoch": 0.025811497849041846, + "loss": 0.23208454251289368, + "loss_ce": 0.004057187121361494, + "loss_iou": 0.314453125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 25605796, + "step": 264 + }, + { + "epoch": 0.02590926867422761, + "grad_norm": 3.394648751146387, + "learning_rate": 5e-05, + "loss": 0.2163, + "num_input_tokens_seen": 25702740, + "step": 265 + }, + { + "epoch": 0.02590926867422761, + "loss": 0.28587067127227783, + "loss_ce": 0.007245165295898914, + "loss_iou": 0.35546875, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 25702740, + "step": 265 + }, + { + "epoch": 0.026007039499413374, + "grad_norm": 2.917133670180123, + "learning_rate": 5e-05, + "loss": 0.1419, + "num_input_tokens_seen": 25799056, + "step": 266 + }, + { + "epoch": 0.026007039499413374, + "loss": 0.10784617066383362, + "loss_ce": 0.00793162640184164, + "loss_iou": 0.38671875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 25799056, + "step": 266 + }, + { + "epoch": 0.02610481032459914, + "grad_norm": 8.418829971140365, + "learning_rate": 5e-05, + "loss": 0.1558, + "num_input_tokens_seen": 25897260, + "step": 267 + }, + { + "epoch": 0.02610481032459914, + "loss": 0.13427573442459106, + "loss_ce": 0.0015853065997362137, + "loss_iou": 0.482421875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 25897260, + "step": 267 + }, + { + "epoch": 0.026202581149784906, + "grad_norm": 4.404442447871286, + "learning_rate": 5e-05, + "loss": 0.1714, + "num_input_tokens_seen": 25994020, + "step": 268 + }, + { + "epoch": 0.026202581149784906, + "loss": 0.13735496997833252, + "loss_ce": 0.010218733921647072, + "loss_iou": 0.3203125, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 25994020, + "step": 268 + }, + { + "epoch": 0.02630035197497067, + "grad_norm": 5.1828723728419925, + "learning_rate": 5e-05, + "loss": 0.2316, + "num_input_tokens_seen": 26091644, + "step": 269 + }, + { + "epoch": 0.02630035197497067, + "loss": 0.1421690136194229, + "loss_ce": 0.009173416532576084, + "loss_iou": 0.244140625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 26091644, + "step": 269 + }, + { + "epoch": 0.026398122800156434, + "grad_norm": 9.771450959908082, + "learning_rate": 5e-05, + "loss": 0.2429, + "num_input_tokens_seen": 26188684, + "step": 270 + }, + { + "epoch": 0.026398122800156434, + "loss": 0.1916218250989914, + "loss_ce": 0.008577396161854267, + "loss_iou": 0.421875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 26188684, + "step": 270 + }, + { + "epoch": 0.026495893625342198, + "grad_norm": 5.552060725258776, + "learning_rate": 5e-05, + "loss": 0.1678, + "num_input_tokens_seen": 26286192, + "step": 271 + }, + { + "epoch": 0.026495893625342198, + "loss": 0.14722752571105957, + "loss_ce": 0.0024521355517208576, + "loss_iou": 0.46875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 26286192, + "step": 271 + }, + { + "epoch": 0.02659366445052796, + "grad_norm": 4.08696092650174, + "learning_rate": 5e-05, + "loss": 0.1993, + "num_input_tokens_seen": 26382508, + "step": 272 + }, + { + "epoch": 0.02659366445052796, + "loss": 0.18536940217018127, + "loss_ce": 0.005620861425995827, + "loss_iou": 0.267578125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 26382508, + "step": 272 + }, + { + "epoch": 0.026691435275713726, + "grad_norm": 4.784589510067373, + "learning_rate": 5e-05, + "loss": 0.2004, + "num_input_tokens_seen": 26479760, + "step": 273 + }, + { + "epoch": 0.026691435275713726, + "loss": 0.22895173728466034, + "loss_ce": 0.010049143806099892, + "loss_iou": 0.39453125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 26479760, + "step": 273 + }, + { + "epoch": 0.026789206100899493, + "grad_norm": 2.5377403642231267, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 26576624, + "step": 274 + }, + { + "epoch": 0.026789206100899493, + "loss": 0.1483156681060791, + "loss_ce": 0.0080568827688694, + "loss_iou": 0.37109375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 26576624, + "step": 274 + }, + { + "epoch": 0.026886976926085257, + "grad_norm": 6.311326992930466, + "learning_rate": 5e-05, + "loss": 0.1774, + "num_input_tokens_seen": 26673452, + "step": 275 + }, + { + "epoch": 0.026886976926085257, + "loss": 0.1865537315607071, + "loss_ce": 0.007888568565249443, + "loss_iou": 0.40625, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 26673452, + "step": 275 + }, + { + "epoch": 0.02698474775127102, + "grad_norm": 3.5194547700896432, + "learning_rate": 5e-05, + "loss": 0.2022, + "num_input_tokens_seen": 26770380, + "step": 276 + }, + { + "epoch": 0.02698474775127102, + "loss": 0.19467481970787048, + "loss_ce": 0.0042756544426083565, + "loss_iou": 0.396484375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 26770380, + "step": 276 + }, + { + "epoch": 0.027082518576456785, + "grad_norm": 3.227069567153899, + "learning_rate": 5e-05, + "loss": 0.2084, + "num_input_tokens_seen": 26867544, + "step": 277 + }, + { + "epoch": 0.027082518576456785, + "loss": 0.1594870686531067, + "loss_ce": 0.00844032783061266, + "loss_iou": 0.283203125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 26867544, + "step": 277 + }, + { + "epoch": 0.02718028940164255, + "grad_norm": 2.52470753377071, + "learning_rate": 5e-05, + "loss": 0.1301, + "num_input_tokens_seen": 26963736, + "step": 278 + }, + { + "epoch": 0.02718028940164255, + "loss": 0.1477309763431549, + "loss_ce": 0.005549577996134758, + "loss_iou": 0.138671875, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 26963736, + "step": 278 + }, + { + "epoch": 0.027278060226828313, + "grad_norm": 13.721399636252622, + "learning_rate": 5e-05, + "loss": 0.1722, + "num_input_tokens_seen": 27061100, + "step": 279 + }, + { + "epoch": 0.027278060226828313, + "loss": 0.19630759954452515, + "loss_ce": 0.010760722681879997, + "loss_iou": 0.40234375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 27061100, + "step": 279 + }, + { + "epoch": 0.02737583105201408, + "grad_norm": 3.7221811594421954, + "learning_rate": 5e-05, + "loss": 0.202, + "num_input_tokens_seen": 27158448, + "step": 280 + }, + { + "epoch": 0.02737583105201408, + "loss": 0.18796461820602417, + "loss_ce": 0.0053474134765565395, + "loss_iou": 0.34375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 27158448, + "step": 280 + }, + { + "epoch": 0.027473601877199844, + "grad_norm": 3.679421766678048, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 27254608, + "step": 281 + }, + { + "epoch": 0.027473601877199844, + "loss": 0.12313250452280045, + "loss_ce": 0.01233843993395567, + "loss_iou": 0.287109375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 27254608, + "step": 281 + }, + { + "epoch": 0.027571372702385608, + "grad_norm": 4.641950503273771, + "learning_rate": 5e-05, + "loss": 0.1827, + "num_input_tokens_seen": 27351428, + "step": 282 + }, + { + "epoch": 0.027571372702385608, + "loss": 0.17282778024673462, + "loss_ce": 0.011755996383726597, + "loss_iou": 0.498046875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 27351428, + "step": 282 + }, + { + "epoch": 0.027669143527571372, + "grad_norm": 6.912874950675265, + "learning_rate": 5e-05, + "loss": 0.2301, + "num_input_tokens_seen": 27448784, + "step": 283 + }, + { + "epoch": 0.027669143527571372, + "loss": 0.26931625604629517, + "loss_ce": 0.010405102744698524, + "loss_iou": 0.1787109375, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 27448784, + "step": 283 + }, + { + "epoch": 0.027766914352757136, + "grad_norm": 3.634787125739518, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 27545732, + "step": 284 + }, + { + "epoch": 0.027766914352757136, + "loss": 0.1355867087841034, + "loss_ce": 0.010556195862591267, + "loss_iou": 0.279296875, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 27545732, + "step": 284 + }, + { + "epoch": 0.0278646851779429, + "grad_norm": 3.367685533898466, + "learning_rate": 5e-05, + "loss": 0.1754, + "num_input_tokens_seen": 27642492, + "step": 285 + }, + { + "epoch": 0.0278646851779429, + "loss": 0.1872805505990982, + "loss_ce": 0.007074255961924791, + "loss_iou": 0.4296875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 27642492, + "step": 285 + }, + { + "epoch": 0.027962456003128668, + "grad_norm": 5.793417427705647, + "learning_rate": 5e-05, + "loss": 0.2129, + "num_input_tokens_seen": 27739016, + "step": 286 + }, + { + "epoch": 0.027962456003128668, + "loss": 0.2246587723493576, + "loss_ce": 0.00635507982224226, + "loss_iou": 0.326171875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 27739016, + "step": 286 + }, + { + "epoch": 0.02806022682831443, + "grad_norm": 4.452151674518816, + "learning_rate": 5e-05, + "loss": 0.1628, + "num_input_tokens_seen": 27836080, + "step": 287 + }, + { + "epoch": 0.02806022682831443, + "loss": 0.12788155674934387, + "loss_ce": 0.0073066167533397675, + "loss_iou": 0.431640625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 27836080, + "step": 287 + }, + { + "epoch": 0.028157997653500196, + "grad_norm": 3.587280579680754, + "learning_rate": 5e-05, + "loss": 0.1837, + "num_input_tokens_seen": 27932944, + "step": 288 + }, + { + "epoch": 0.028157997653500196, + "loss": 0.1637641191482544, + "loss_ce": 0.006583323236554861, + "loss_iou": 0.228515625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 27932944, + "step": 288 + }, + { + "epoch": 0.02825576847868596, + "grad_norm": 2.9670575571730238, + "learning_rate": 5e-05, + "loss": 0.2075, + "num_input_tokens_seen": 28031456, + "step": 289 + }, + { + "epoch": 0.02825576847868596, + "loss": 0.16774097084999084, + "loss_ce": 0.005509533919394016, + "loss_iou": 0.345703125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 28031456, + "step": 289 + }, + { + "epoch": 0.028353539303871723, + "grad_norm": 3.1755248456906044, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 28128128, + "step": 290 + }, + { + "epoch": 0.028353539303871723, + "loss": 0.09320749342441559, + "loss_ce": 0.0065375687554478645, + "loss_iou": 0.287109375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 28128128, + "step": 290 + }, + { + "epoch": 0.02845131012905749, + "grad_norm": 3.1866797201683967, + "learning_rate": 5e-05, + "loss": 0.1434, + "num_input_tokens_seen": 28225156, + "step": 291 + }, + { + "epoch": 0.02845131012905749, + "loss": 0.14454537630081177, + "loss_ce": 0.008467486128211021, + "loss_iou": 0.2578125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 28225156, + "step": 291 + }, + { + "epoch": 0.028549080954243255, + "grad_norm": 4.635021530350107, + "learning_rate": 5e-05, + "loss": 0.1373, + "num_input_tokens_seen": 28322504, + "step": 292 + }, + { + "epoch": 0.028549080954243255, + "loss": 0.12356653809547424, + "loss_ce": 0.009034059941768646, + "loss_iou": 0.330078125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 28322504, + "step": 292 + }, + { + "epoch": 0.02864685177942902, + "grad_norm": 3.115213659000646, + "learning_rate": 5e-05, + "loss": 0.1738, + "num_input_tokens_seen": 28419224, + "step": 293 + }, + { + "epoch": 0.02864685177942902, + "loss": 0.13375765085220337, + "loss_ce": 0.010375093668699265, + "loss_iou": 0.419921875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 28419224, + "step": 293 + }, + { + "epoch": 0.028744622604614783, + "grad_norm": 14.732932681106165, + "learning_rate": 5e-05, + "loss": 0.1737, + "num_input_tokens_seen": 28516136, + "step": 294 + }, + { + "epoch": 0.028744622604614783, + "loss": 0.1393183171749115, + "loss_ce": 0.008031698875129223, + "loss_iou": 0.453125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 28516136, + "step": 294 + }, + { + "epoch": 0.028842393429800547, + "grad_norm": 5.433319702245676, + "learning_rate": 5e-05, + "loss": 0.1769, + "num_input_tokens_seen": 28613604, + "step": 295 + }, + { + "epoch": 0.028842393429800547, + "loss": 0.1717189997434616, + "loss_ce": 0.008083745837211609, + "loss_iou": 0.3515625, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 28613604, + "step": 295 + }, + { + "epoch": 0.02894016425498631, + "grad_norm": 5.924554247337379, + "learning_rate": 5e-05, + "loss": 0.2058, + "num_input_tokens_seen": 28710320, + "step": 296 + }, + { + "epoch": 0.02894016425498631, + "loss": 0.23196996748447418, + "loss_ce": 0.008703356608748436, + "loss_iou": 0.5703125, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 28710320, + "step": 296 + }, + { + "epoch": 0.029037935080172078, + "grad_norm": 3.697533472259469, + "learning_rate": 5e-05, + "loss": 0.1632, + "num_input_tokens_seen": 28808076, + "step": 297 + }, + { + "epoch": 0.029037935080172078, + "loss": 0.19110259413719177, + "loss_ce": 0.005921920761466026, + "loss_iou": 0.490234375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 28808076, + "step": 297 + }, + { + "epoch": 0.029135705905357842, + "grad_norm": 3.3617269538378722, + "learning_rate": 5e-05, + "loss": 0.1559, + "num_input_tokens_seen": 28905228, + "step": 298 + }, + { + "epoch": 0.029135705905357842, + "loss": 0.15876883268356323, + "loss_ce": 0.00904960185289383, + "loss_iou": 0.33984375, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 28905228, + "step": 298 + }, + { + "epoch": 0.029233476730543606, + "grad_norm": 4.254741710242712, + "learning_rate": 5e-05, + "loss": 0.2416, + "num_input_tokens_seen": 29001620, + "step": 299 + }, + { + "epoch": 0.029233476730543606, + "loss": 0.2122098058462143, + "loss_ce": 0.004812339786440134, + "loss_iou": 0.400390625, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 29001620, + "step": 299 + }, + { + "epoch": 0.02933124755572937, + "grad_norm": 3.8773319599343776, + "learning_rate": 5e-05, + "loss": 0.2352, + "num_input_tokens_seen": 29099252, + "step": 300 + }, + { + "epoch": 0.02933124755572937, + "loss": 0.19704414904117584, + "loss_ce": 0.0076978327706456184, + "loss_iou": 0.376953125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 29099252, + "step": 300 + }, + { + "epoch": 0.029429018380915134, + "grad_norm": 3.354038780175972, + "learning_rate": 5e-05, + "loss": 0.1487, + "num_input_tokens_seen": 29196040, + "step": 301 + }, + { + "epoch": 0.029429018380915134, + "loss": 0.18631038069725037, + "loss_ce": 0.007980910129845142, + "loss_iou": 0.419921875, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 29196040, + "step": 301 + }, + { + "epoch": 0.029526789206100898, + "grad_norm": 5.538395540094714, + "learning_rate": 5e-05, + "loss": 0.2218, + "num_input_tokens_seen": 29293348, + "step": 302 + }, + { + "epoch": 0.029526789206100898, + "loss": 0.18337316811084747, + "loss_ce": 0.007530874572694302, + "loss_iou": 0.41015625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 29293348, + "step": 302 + }, + { + "epoch": 0.029624560031286665, + "grad_norm": 3.9844289633866037, + "learning_rate": 5e-05, + "loss": 0.2041, + "num_input_tokens_seen": 29389876, + "step": 303 + }, + { + "epoch": 0.029624560031286665, + "loss": 0.20983096957206726, + "loss_ce": 0.007560454308986664, + "loss_iou": 0.2890625, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 29389876, + "step": 303 + }, + { + "epoch": 0.02972233085647243, + "grad_norm": 3.4991378372327637, + "learning_rate": 5e-05, + "loss": 0.2258, + "num_input_tokens_seen": 29485932, + "step": 304 + }, + { + "epoch": 0.02972233085647243, + "loss": 0.17241480946540833, + "loss_ce": 0.009756125509738922, + "loss_iou": 0.162109375, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 29485932, + "step": 304 + }, + { + "epoch": 0.029820101681658193, + "grad_norm": 6.081726214746984, + "learning_rate": 5e-05, + "loss": 0.1668, + "num_input_tokens_seen": 29581940, + "step": 305 + }, + { + "epoch": 0.029820101681658193, + "loss": 0.1973535120487213, + "loss_ce": 0.007961426861584187, + "loss_iou": 0.43359375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 29581940, + "step": 305 + }, + { + "epoch": 0.029917872506843957, + "grad_norm": 3.071029735992362, + "learning_rate": 5e-05, + "loss": 0.2089, + "num_input_tokens_seen": 29679660, + "step": 306 + }, + { + "epoch": 0.029917872506843957, + "loss": 0.1848706156015396, + "loss_ce": 0.0055188145488500595, + "loss_iou": 0.42578125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 29679660, + "step": 306 + }, + { + "epoch": 0.03001564333202972, + "grad_norm": 3.5074066421691508, + "learning_rate": 5e-05, + "loss": 0.2059, + "num_input_tokens_seen": 29776800, + "step": 307 + }, + { + "epoch": 0.03001564333202972, + "loss": 0.11976480484008789, + "loss_ce": 0.0049576712772250175, + "loss_iou": 0.4921875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 29776800, + "step": 307 + }, + { + "epoch": 0.030113414157215485, + "grad_norm": 3.471184490754876, + "learning_rate": 5e-05, + "loss": 0.2063, + "num_input_tokens_seen": 29873628, + "step": 308 + }, + { + "epoch": 0.030113414157215485, + "loss": 0.19691242277622223, + "loss_ce": 0.0054756514728069305, + "loss_iou": 0.34765625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 29873628, + "step": 308 + }, + { + "epoch": 0.030211184982401253, + "grad_norm": 4.3963936663930205, + "learning_rate": 5e-05, + "loss": 0.1427, + "num_input_tokens_seen": 29970552, + "step": 309 + }, + { + "epoch": 0.030211184982401253, + "loss": 0.14975596964359283, + "loss_ce": 0.003088481957092881, + "loss_iou": 0.4296875, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 29970552, + "step": 309 + }, + { + "epoch": 0.030308955807587017, + "grad_norm": 8.994319312985862, + "learning_rate": 5e-05, + "loss": 0.1523, + "num_input_tokens_seen": 30068076, + "step": 310 + }, + { + "epoch": 0.030308955807587017, + "loss": 0.15440887212753296, + "loss_ce": 0.004407340660691261, + "loss_iou": 0.318359375, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 30068076, + "step": 310 + }, + { + "epoch": 0.03040672663277278, + "grad_norm": 19.65487908910161, + "learning_rate": 5e-05, + "loss": 0.2187, + "num_input_tokens_seen": 30165388, + "step": 311 + }, + { + "epoch": 0.03040672663277278, + "loss": 0.24466809630393982, + "loss_ce": 0.008004285395145416, + "loss_iou": 0.2392578125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 30165388, + "step": 311 + }, + { + "epoch": 0.030504497457958545, + "grad_norm": 4.063264025052342, + "learning_rate": 5e-05, + "loss": 0.2305, + "num_input_tokens_seen": 30262264, + "step": 312 + }, + { + "epoch": 0.030504497457958545, + "loss": 0.199422687292099, + "loss_ce": 0.007589180953800678, + "loss_iou": 0.453125, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 30262264, + "step": 312 + }, + { + "epoch": 0.03060226828314431, + "grad_norm": 3.400062512359794, + "learning_rate": 5e-05, + "loss": 0.1916, + "num_input_tokens_seen": 30359056, + "step": 313 + }, + { + "epoch": 0.03060226828314431, + "loss": 0.20363235473632812, + "loss_ce": 0.010364541783928871, + "loss_iou": 0.390625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 30359056, + "step": 313 + }, + { + "epoch": 0.030700039108330076, + "grad_norm": 5.447693503709613, + "learning_rate": 5e-05, + "loss": 0.1364, + "num_input_tokens_seen": 30455856, + "step": 314 + }, + { + "epoch": 0.030700039108330076, + "loss": 0.17199884355068207, + "loss_ce": 0.003846979234367609, + "loss_iou": 0.36328125, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 30455856, + "step": 314 + }, + { + "epoch": 0.03079780993351584, + "grad_norm": 4.55955871822672, + "learning_rate": 5e-05, + "loss": 0.2539, + "num_input_tokens_seen": 30552696, + "step": 315 + }, + { + "epoch": 0.03079780993351584, + "loss": 0.22722306847572327, + "loss_ce": 0.011830000206828117, + "loss_iou": 0.3515625, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 30552696, + "step": 315 + }, + { + "epoch": 0.030895580758701604, + "grad_norm": 3.6431603247886035, + "learning_rate": 5e-05, + "loss": 0.1899, + "num_input_tokens_seen": 30650476, + "step": 316 + }, + { + "epoch": 0.030895580758701604, + "loss": 0.19650548696517944, + "loss_ce": 0.013522102497518063, + "loss_iou": 0.37890625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 30650476, + "step": 316 + }, + { + "epoch": 0.030993351583887368, + "grad_norm": 4.468146369164339, + "learning_rate": 5e-05, + "loss": 0.2431, + "num_input_tokens_seen": 30747840, + "step": 317 + }, + { + "epoch": 0.030993351583887368, + "loss": 0.2775716483592987, + "loss_ce": 0.01194664929062128, + "loss_iou": 0.375, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 30747840, + "step": 317 + }, + { + "epoch": 0.031091122409073132, + "grad_norm": 5.475279344642474, + "learning_rate": 5e-05, + "loss": 0.1597, + "num_input_tokens_seen": 30845668, + "step": 318 + }, + { + "epoch": 0.031091122409073132, + "loss": 0.15511652827262878, + "loss_ce": 0.01003597304224968, + "loss_iou": 0.5859375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 30845668, + "step": 318 + }, + { + "epoch": 0.031188893234258896, + "grad_norm": 3.6038877922583015, + "learning_rate": 5e-05, + "loss": 0.1634, + "num_input_tokens_seen": 30941872, + "step": 319 + }, + { + "epoch": 0.031188893234258896, + "loss": 0.2287176549434662, + "loss_ce": 0.006893018260598183, + "loss_iou": 0.203125, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 30941872, + "step": 319 + }, + { + "epoch": 0.03128666405944466, + "grad_norm": 20.312812219197635, + "learning_rate": 5e-05, + "loss": 0.1539, + "num_input_tokens_seen": 31038264, + "step": 320 + }, + { + "epoch": 0.03128666405944466, + "loss": 0.16370461881160736, + "loss_ce": 0.007668232079595327, + "loss_iou": 0.2734375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 31038264, + "step": 320 + }, + { + "epoch": 0.03138443488463043, + "grad_norm": 7.314357201956468, + "learning_rate": 5e-05, + "loss": 0.2534, + "num_input_tokens_seen": 31134836, + "step": 321 + }, + { + "epoch": 0.03138443488463043, + "loss": 0.2412761002779007, + "loss_ce": 0.0071452343836426735, + "loss_iou": 0.4375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 31134836, + "step": 321 + }, + { + "epoch": 0.03148220570981619, + "grad_norm": 3.191416375692913, + "learning_rate": 5e-05, + "loss": 0.1438, + "num_input_tokens_seen": 31232336, + "step": 322 + }, + { + "epoch": 0.03148220570981619, + "loss": 0.08646965026855469, + "loss_ce": 0.004011153243482113, + "loss_iou": 0.5234375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 31232336, + "step": 322 + }, + { + "epoch": 0.031579976535001955, + "grad_norm": 3.8260379833935505, + "learning_rate": 5e-05, + "loss": 0.2327, + "num_input_tokens_seen": 31329332, + "step": 323 + }, + { + "epoch": 0.031579976535001955, + "loss": 0.17138229310512543, + "loss_ce": 0.006175371818244457, + "loss_iou": 0.30078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 31329332, + "step": 323 + }, + { + "epoch": 0.03167774736018772, + "grad_norm": 4.391946302631016, + "learning_rate": 5e-05, + "loss": 0.2264, + "num_input_tokens_seen": 31426356, + "step": 324 + }, + { + "epoch": 0.03167774736018772, + "loss": 0.2262953370809555, + "loss_ce": 0.007240166421979666, + "loss_iou": 0.216796875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 31426356, + "step": 324 + }, + { + "epoch": 0.03177551818537348, + "grad_norm": 3.5749722849496446, + "learning_rate": 5e-05, + "loss": 0.1522, + "num_input_tokens_seen": 31523396, + "step": 325 + }, + { + "epoch": 0.03177551818537348, + "loss": 0.18253609538078308, + "loss_ce": 0.005595170892775059, + "loss_iou": 0.4296875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 31523396, + "step": 325 + }, + { + "epoch": 0.03187328901055925, + "grad_norm": 8.11870591480968, + "learning_rate": 5e-05, + "loss": 0.1701, + "num_input_tokens_seen": 31620224, + "step": 326 + }, + { + "epoch": 0.03187328901055925, + "loss": 0.1979908049106598, + "loss_ce": 0.008476657792925835, + "loss_iou": 0.26953125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 31620224, + "step": 326 + }, + { + "epoch": 0.03197105983574501, + "grad_norm": 4.738920322036286, + "learning_rate": 5e-05, + "loss": 0.2087, + "num_input_tokens_seen": 31717368, + "step": 327 + }, + { + "epoch": 0.03197105983574501, + "loss": 0.1788043975830078, + "loss_ce": 0.008516302332282066, + "loss_iou": 0.3515625, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 31717368, + "step": 327 + }, + { + "epoch": 0.032068830660930775, + "grad_norm": 16.12975090924684, + "learning_rate": 5e-05, + "loss": 0.1604, + "num_input_tokens_seen": 31814808, + "step": 328 + }, + { + "epoch": 0.032068830660930775, + "loss": 0.1528216451406479, + "loss_ce": 0.012135598808526993, + "loss_iou": 0.421875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 31814808, + "step": 328 + }, + { + "epoch": 0.032166601486116546, + "grad_norm": 4.791367612897675, + "learning_rate": 5e-05, + "loss": 0.1848, + "num_input_tokens_seen": 31911140, + "step": 329 + }, + { + "epoch": 0.032166601486116546, + "loss": 0.21234595775604248, + "loss_ce": 0.006138673983514309, + "loss_iou": 0.314453125, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 31911140, + "step": 329 + }, + { + "epoch": 0.03226437231130231, + "grad_norm": 30.783687181721653, + "learning_rate": 5e-05, + "loss": 0.158, + "num_input_tokens_seen": 32008656, + "step": 330 + }, + { + "epoch": 0.03226437231130231, + "loss": 0.16899831593036652, + "loss_ce": 0.0030742473900318146, + "loss_iou": 0.369140625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 32008656, + "step": 330 + }, + { + "epoch": 0.032362143136488074, + "grad_norm": 3.762143285801564, + "learning_rate": 5e-05, + "loss": 0.1982, + "num_input_tokens_seen": 32105496, + "step": 331 + }, + { + "epoch": 0.032362143136488074, + "loss": 0.2217290699481964, + "loss_ce": 0.006458076648414135, + "loss_iou": 0.376953125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 32105496, + "step": 331 + }, + { + "epoch": 0.03245991396167384, + "grad_norm": 5.464062965555648, + "learning_rate": 5e-05, + "loss": 0.2148, + "num_input_tokens_seen": 32203332, + "step": 332 + }, + { + "epoch": 0.03245991396167384, + "loss": 0.18748286366462708, + "loss_ce": 0.008222604170441628, + "loss_iou": 0.291015625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 32203332, + "step": 332 + }, + { + "epoch": 0.0325576847868596, + "grad_norm": 12.440776922743323, + "learning_rate": 5e-05, + "loss": 0.1902, + "num_input_tokens_seen": 32300684, + "step": 333 + }, + { + "epoch": 0.0325576847868596, + "loss": 0.16000425815582275, + "loss_ce": 0.007263787090778351, + "loss_iou": 0.48046875, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 32300684, + "step": 333 + }, + { + "epoch": 0.032655455612045366, + "grad_norm": 5.588883915435599, + "learning_rate": 5e-05, + "loss": 0.2377, + "num_input_tokens_seen": 32397992, + "step": 334 + }, + { + "epoch": 0.032655455612045366, + "loss": 0.2634441554546356, + "loss_ce": 0.009782053530216217, + "loss_iou": 0.298828125, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 32397992, + "step": 334 + }, + { + "epoch": 0.03275322643723113, + "grad_norm": 6.130358640982554, + "learning_rate": 5e-05, + "loss": 0.1582, + "num_input_tokens_seen": 32494964, + "step": 335 + }, + { + "epoch": 0.03275322643723113, + "loss": 0.14217978715896606, + "loss_ce": 0.004698100965470076, + "loss_iou": 0.494140625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 32494964, + "step": 335 + }, + { + "epoch": 0.032850997262416894, + "grad_norm": 15.801292645221718, + "learning_rate": 5e-05, + "loss": 0.154, + "num_input_tokens_seen": 32591984, + "step": 336 + }, + { + "epoch": 0.032850997262416894, + "loss": 0.13688498735427856, + "loss_ce": 0.006361317355185747, + "loss_iou": 0.3359375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 32591984, + "step": 336 + }, + { + "epoch": 0.03294876808760266, + "grad_norm": 3.412621435649311, + "learning_rate": 5e-05, + "loss": 0.1677, + "num_input_tokens_seen": 32689392, + "step": 337 + }, + { + "epoch": 0.03294876808760266, + "loss": 0.14598047733306885, + "loss_ce": 0.004836682230234146, + "loss_iou": 0.4296875, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 32689392, + "step": 337 + }, + { + "epoch": 0.03304653891278842, + "grad_norm": 5.025882521097986, + "learning_rate": 5e-05, + "loss": 0.1967, + "num_input_tokens_seen": 32786284, + "step": 338 + }, + { + "epoch": 0.03304653891278842, + "loss": 0.18051043152809143, + "loss_ce": 0.007170583121478558, + "loss_iou": 0.5078125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 32786284, + "step": 338 + }, + { + "epoch": 0.033144309737974186, + "grad_norm": 3.463333975355933, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 32883232, + "step": 339 + }, + { + "epoch": 0.033144309737974186, + "loss": 0.1263369917869568, + "loss_ce": 0.012384353205561638, + "loss_iou": 0.404296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 32883232, + "step": 339 + }, + { + "epoch": 0.033242080563159956, + "grad_norm": 4.023147522713202, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 32980164, + "step": 340 + }, + { + "epoch": 0.033242080563159956, + "loss": 0.1150117814540863, + "loss_ce": 0.016745179891586304, + "loss_iou": 0.435546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 32980164, + "step": 340 + }, + { + "epoch": 0.03333985138834572, + "grad_norm": 5.809450885801215, + "learning_rate": 5e-05, + "loss": 0.2021, + "num_input_tokens_seen": 33077312, + "step": 341 + }, + { + "epoch": 0.03333985138834572, + "loss": 0.23293447494506836, + "loss_ce": 0.005334371235221624, + "loss_iou": 0.376953125, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 33077312, + "step": 341 + }, + { + "epoch": 0.033437622213531484, + "grad_norm": 15.936850873234617, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 33174800, + "step": 342 + }, + { + "epoch": 0.033437622213531484, + "loss": 0.15278702974319458, + "loss_ce": 0.006211102940142155, + "loss_iou": 0.392578125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 33174800, + "step": 342 + }, + { + "epoch": 0.03353539303871725, + "grad_norm": 5.790493957700404, + "learning_rate": 5e-05, + "loss": 0.2174, + "num_input_tokens_seen": 33271708, + "step": 343 + }, + { + "epoch": 0.03353539303871725, + "loss": 0.21929121017456055, + "loss_ce": 0.00579023314639926, + "loss_iou": 0.333984375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 33271708, + "step": 343 + }, + { + "epoch": 0.03363316386390301, + "grad_norm": 5.872130480525834, + "learning_rate": 5e-05, + "loss": 0.1924, + "num_input_tokens_seen": 33368836, + "step": 344 + }, + { + "epoch": 0.03363316386390301, + "loss": 0.16697782278060913, + "loss_ce": 0.007370887324213982, + "loss_iou": 0.404296875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 33368836, + "step": 344 + }, + { + "epoch": 0.033730934689088776, + "grad_norm": 3.6195615939736876, + "learning_rate": 5e-05, + "loss": 0.2004, + "num_input_tokens_seen": 33465812, + "step": 345 + }, + { + "epoch": 0.033730934689088776, + "loss": 0.21867378056049347, + "loss_ce": 0.004623482935130596, + "loss_iou": 0.41796875, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 33465812, + "step": 345 + }, + { + "epoch": 0.03382870551427454, + "grad_norm": 4.376057041218454, + "learning_rate": 5e-05, + "loss": 0.1998, + "num_input_tokens_seen": 33563268, + "step": 346 + }, + { + "epoch": 0.03382870551427454, + "loss": 0.22111165523529053, + "loss_ce": 0.009258627891540527, + "loss_iou": 0.427734375, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 33563268, + "step": 346 + }, + { + "epoch": 0.033926476339460304, + "grad_norm": 6.28259152379781, + "learning_rate": 5e-05, + "loss": 0.1988, + "num_input_tokens_seen": 33659980, + "step": 347 + }, + { + "epoch": 0.033926476339460304, + "loss": 0.2175336480140686, + "loss_ce": 0.00650458550080657, + "loss_iou": 0.3359375, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 33659980, + "step": 347 + }, + { + "epoch": 0.03402424716464607, + "grad_norm": 7.534513444598333, + "learning_rate": 5e-05, + "loss": 0.2604, + "num_input_tokens_seen": 33755872, + "step": 348 + }, + { + "epoch": 0.03402424716464607, + "loss": 0.2507891356945038, + "loss_ce": 0.011134600266814232, + "loss_iou": 0.181640625, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 33755872, + "step": 348 + }, + { + "epoch": 0.03412201798983183, + "grad_norm": 4.958425421674483, + "learning_rate": 5e-05, + "loss": 0.1283, + "num_input_tokens_seen": 33851964, + "step": 349 + }, + { + "epoch": 0.03412201798983183, + "loss": 0.1564246118068695, + "loss_ce": 0.005728810094296932, + "loss_iou": 0.302734375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 33851964, + "step": 349 + }, + { + "epoch": 0.034219788815017596, + "grad_norm": 7.217830263049113, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 33949312, + "step": 350 + }, + { + "epoch": 0.034219788815017596, + "loss": 0.11062973737716675, + "loss_ce": 0.008502653799951077, + "loss_iou": 0.37109375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 33949312, + "step": 350 + }, + { + "epoch": 0.03431755964020336, + "grad_norm": 3.2475596459319114, + "learning_rate": 5e-05, + "loss": 0.1305, + "num_input_tokens_seen": 34046684, + "step": 351 + }, + { + "epoch": 0.03431755964020336, + "loss": 0.14061084389686584, + "loss_ce": 0.0070964498445391655, + "loss_iou": 0.357421875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 34046684, + "step": 351 + }, + { + "epoch": 0.03441533046538913, + "grad_norm": 3.2695177933388315, + "learning_rate": 5e-05, + "loss": 0.2, + "num_input_tokens_seen": 34143592, + "step": 352 + }, + { + "epoch": 0.03441533046538913, + "loss": 0.26480939984321594, + "loss_ce": 0.006813789252191782, + "loss_iou": 0.546875, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 34143592, + "step": 352 + }, + { + "epoch": 0.034513101290574895, + "grad_norm": 4.500216940765318, + "learning_rate": 5e-05, + "loss": 0.1563, + "num_input_tokens_seen": 34241020, + "step": 353 + }, + { + "epoch": 0.034513101290574895, + "loss": 0.16896812617778778, + "loss_ce": 0.006584082264453173, + "loss_iou": 0.37890625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 34241020, + "step": 353 + }, + { + "epoch": 0.03461087211576066, + "grad_norm": 5.364227776313456, + "learning_rate": 5e-05, + "loss": 0.2066, + "num_input_tokens_seen": 34337780, + "step": 354 + }, + { + "epoch": 0.03461087211576066, + "loss": 0.17765089869499207, + "loss_ce": 0.008034199476242065, + "loss_iou": 0.46875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 34337780, + "step": 354 + }, + { + "epoch": 0.03470864294094642, + "grad_norm": 2.7776346036695188, + "learning_rate": 5e-05, + "loss": 0.1526, + "num_input_tokens_seen": 34433884, + "step": 355 + }, + { + "epoch": 0.03470864294094642, + "loss": 0.13366876542568207, + "loss_ce": 0.005037168972194195, + "loss_iou": 0.33984375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 34433884, + "step": 355 + }, + { + "epoch": 0.03480641376613219, + "grad_norm": 29.448102304726998, + "learning_rate": 5e-05, + "loss": 0.1631, + "num_input_tokens_seen": 34532144, + "step": 356 + }, + { + "epoch": 0.03480641376613219, + "loss": 0.15827792882919312, + "loss_ce": 0.008802834898233414, + "loss_iou": 0.4296875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 34532144, + "step": 356 + }, + { + "epoch": 0.03490418459131795, + "grad_norm": 4.917464695200912, + "learning_rate": 5e-05, + "loss": 0.1803, + "num_input_tokens_seen": 34627876, + "step": 357 + }, + { + "epoch": 0.03490418459131795, + "loss": 0.10692852735519409, + "loss_ce": 0.013910949230194092, + "loss_iou": 0.318359375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 34627876, + "step": 357 + }, + { + "epoch": 0.035001955416503715, + "grad_norm": 6.721098184695476, + "learning_rate": 5e-05, + "loss": 0.2145, + "num_input_tokens_seen": 34724944, + "step": 358 + }, + { + "epoch": 0.035001955416503715, + "loss": 0.19568848609924316, + "loss_ce": 0.015390636399388313, + "loss_iou": 0.408203125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 34724944, + "step": 358 + }, + { + "epoch": 0.03509972624168948, + "grad_norm": 15.624079099365906, + "learning_rate": 5e-05, + "loss": 0.2794, + "num_input_tokens_seen": 34821488, + "step": 359 + }, + { + "epoch": 0.03509972624168948, + "loss": 0.27047908306121826, + "loss_ce": 0.009309657849371433, + "loss_iou": 0.337890625, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 34821488, + "step": 359 + }, + { + "epoch": 0.03519749706687524, + "grad_norm": 9.534964535063358, + "learning_rate": 5e-05, + "loss": 0.2097, + "num_input_tokens_seen": 34917868, + "step": 360 + }, + { + "epoch": 0.03519749706687524, + "loss": 0.19575881958007812, + "loss_ce": 0.006641392596065998, + "loss_iou": 0.162109375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 34917868, + "step": 360 + }, + { + "epoch": 0.03529526789206101, + "grad_norm": 8.067608702259086, + "learning_rate": 5e-05, + "loss": 0.1617, + "num_input_tokens_seen": 35014572, + "step": 361 + }, + { + "epoch": 0.03529526789206101, + "loss": 0.1662878841161728, + "loss_ce": 0.008512002415955067, + "loss_iou": 0.40234375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 35014572, + "step": 361 + }, + { + "epoch": 0.03539303871724677, + "grad_norm": 3.1379797414837, + "learning_rate": 5e-05, + "loss": 0.1717, + "num_input_tokens_seen": 35111840, + "step": 362 + }, + { + "epoch": 0.03539303871724677, + "loss": 0.19845931231975555, + "loss_ce": 0.016635596752166748, + "loss_iou": 0.462890625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 35111840, + "step": 362 + }, + { + "epoch": 0.03549080954243254, + "grad_norm": 2.6355532097158223, + "learning_rate": 5e-05, + "loss": 0.1575, + "num_input_tokens_seen": 35208220, + "step": 363 + }, + { + "epoch": 0.03549080954243254, + "loss": 0.09119050204753876, + "loss_ce": 0.009708564728498459, + "loss_iou": 0.2333984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 35208220, + "step": 363 + }, + { + "epoch": 0.035588580367618305, + "grad_norm": 4.347384987651324, + "learning_rate": 5e-05, + "loss": 0.1318, + "num_input_tokens_seen": 35304988, + "step": 364 + }, + { + "epoch": 0.035588580367618305, + "loss": 0.13263113796710968, + "loss_ce": 0.005922159180045128, + "loss_iou": 0.302734375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 35304988, + "step": 364 + }, + { + "epoch": 0.03568635119280407, + "grad_norm": 2.166947049830192, + "learning_rate": 5e-05, + "loss": 0.1735, + "num_input_tokens_seen": 35401640, + "step": 365 + }, + { + "epoch": 0.03568635119280407, + "loss": 0.18686673045158386, + "loss_ce": 0.006935088895261288, + "loss_iou": 0.298828125, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 35401640, + "step": 365 + }, + { + "epoch": 0.03578412201798983, + "grad_norm": 2.4369460425258964, + "learning_rate": 5e-05, + "loss": 0.1472, + "num_input_tokens_seen": 35497784, + "step": 366 + }, + { + "epoch": 0.03578412201798983, + "loss": 0.11426910758018494, + "loss_ce": 0.005016180220991373, + "loss_iou": 0.310546875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 35497784, + "step": 366 + }, + { + "epoch": 0.0358818928431756, + "grad_norm": 3.9346720444691017, + "learning_rate": 5e-05, + "loss": 0.1766, + "num_input_tokens_seen": 35594692, + "step": 367 + }, + { + "epoch": 0.0358818928431756, + "loss": 0.18981972336769104, + "loss_ce": 0.0035404320806264877, + "loss_iou": 0.353515625, + "loss_num": 0.037109375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 35594692, + "step": 367 + }, + { + "epoch": 0.03597966366836136, + "grad_norm": 6.641039616218235, + "learning_rate": 5e-05, + "loss": 0.21, + "num_input_tokens_seen": 35691628, + "step": 368 + }, + { + "epoch": 0.03597966366836136, + "loss": 0.23153077065944672, + "loss_ce": 0.012109391391277313, + "loss_iou": 0.388671875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 35691628, + "step": 368 + }, + { + "epoch": 0.036077434493547125, + "grad_norm": 4.238890951631867, + "learning_rate": 5e-05, + "loss": 0.162, + "num_input_tokens_seen": 35788780, + "step": 369 + }, + { + "epoch": 0.036077434493547125, + "loss": 0.14213895797729492, + "loss_ce": 0.007831096649169922, + "loss_iou": 0.240234375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 35788780, + "step": 369 + }, + { + "epoch": 0.03617520531873289, + "grad_norm": 2.831397176234303, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 35885232, + "step": 370 + }, + { + "epoch": 0.03617520531873289, + "loss": 0.1230609118938446, + "loss_ce": 0.011915897950530052, + "loss_iou": 0.384765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 35885232, + "step": 370 + }, + { + "epoch": 0.03627297614391865, + "grad_norm": 5.517975400330322, + "learning_rate": 5e-05, + "loss": 0.1551, + "num_input_tokens_seen": 35981912, + "step": 371 + }, + { + "epoch": 0.03627297614391865, + "loss": 0.15046143531799316, + "loss_ce": 0.005686035845428705, + "loss_iou": 0.4296875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 35981912, + "step": 371 + }, + { + "epoch": 0.03637074696910442, + "grad_norm": 4.121447043149291, + "learning_rate": 5e-05, + "loss": 0.1671, + "num_input_tokens_seen": 36078168, + "step": 372 + }, + { + "epoch": 0.03637074696910442, + "loss": 0.1504410207271576, + "loss_ce": 0.005024760030210018, + "loss_iou": 0.5078125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 36078168, + "step": 372 + }, + { + "epoch": 0.03646851779429018, + "grad_norm": 4.367578305482854, + "learning_rate": 5e-05, + "loss": 0.2053, + "num_input_tokens_seen": 36174776, + "step": 373 + }, + { + "epoch": 0.03646851779429018, + "loss": 0.24464990198612213, + "loss_ce": 0.010396968573331833, + "loss_iou": 0.474609375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 36174776, + "step": 373 + }, + { + "epoch": 0.036566288619475945, + "grad_norm": 4.25490512825702, + "learning_rate": 5e-05, + "loss": 0.2195, + "num_input_tokens_seen": 36272044, + "step": 374 + }, + { + "epoch": 0.036566288619475945, + "loss": 0.1524159014225006, + "loss_ce": 0.0022694210056215525, + "loss_iou": 0.45703125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 36272044, + "step": 374 + }, + { + "epoch": 0.036664059444661716, + "grad_norm": 11.040062512663647, + "learning_rate": 5e-05, + "loss": 0.1314, + "num_input_tokens_seen": 36368356, + "step": 375 + }, + { + "epoch": 0.036664059444661716, + "loss": 0.10400186479091644, + "loss_ce": 0.007078031077980995, + "loss_iou": 0.314453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 36368356, + "step": 375 + }, + { + "epoch": 0.03676183026984748, + "grad_norm": 3.514007754870044, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 36465824, + "step": 376 + }, + { + "epoch": 0.03676183026984748, + "loss": 0.1382519155740738, + "loss_ce": 0.003486289642751217, + "loss_iou": 0.54296875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 36465824, + "step": 376 + }, + { + "epoch": 0.036859601095033244, + "grad_norm": 4.7425443305239305, + "learning_rate": 5e-05, + "loss": 0.1739, + "num_input_tokens_seen": 36562260, + "step": 377 + }, + { + "epoch": 0.036859601095033244, + "loss": 0.1697670817375183, + "loss_ce": 0.005277327261865139, + "loss_iou": 0.478515625, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 36562260, + "step": 377 + }, + { + "epoch": 0.03695737192021901, + "grad_norm": 9.611302702097513, + "learning_rate": 5e-05, + "loss": 0.2281, + "num_input_tokens_seen": 36660628, + "step": 378 + }, + { + "epoch": 0.03695737192021901, + "loss": 0.2803610861301422, + "loss_ce": 0.012050527147948742, + "loss_iou": 0.53515625, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 36660628, + "step": 378 + }, + { + "epoch": 0.03705514274540477, + "grad_norm": 3.807426123950739, + "learning_rate": 5e-05, + "loss": 0.1409, + "num_input_tokens_seen": 36757360, + "step": 379 + }, + { + "epoch": 0.03705514274540477, + "loss": 0.171976700425148, + "loss_ce": 0.009623182006180286, + "loss_iou": 0.32421875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 36757360, + "step": 379 + }, + { + "epoch": 0.037152913570590536, + "grad_norm": 11.950298862872328, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 36853760, + "step": 380 + }, + { + "epoch": 0.037152913570590536, + "loss": 0.07795757800340652, + "loss_ce": 0.0106663154438138, + "loss_iou": 0.2392578125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 36853760, + "step": 380 + }, + { + "epoch": 0.0372506843957763, + "grad_norm": 10.138166415900114, + "learning_rate": 5e-05, + "loss": 0.1998, + "num_input_tokens_seen": 36950112, + "step": 381 + }, + { + "epoch": 0.0372506843957763, + "loss": 0.16908186674118042, + "loss_ce": 0.011305982246994972, + "loss_iou": 0.58984375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 36950112, + "step": 381 + }, + { + "epoch": 0.037348455220962064, + "grad_norm": 3.935873693632544, + "learning_rate": 5e-05, + "loss": 0.2045, + "num_input_tokens_seen": 37047600, + "step": 382 + }, + { + "epoch": 0.037348455220962064, + "loss": 0.2676904797554016, + "loss_ce": 0.006704151164740324, + "loss_iou": 0.33203125, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 37047600, + "step": 382 + }, + { + "epoch": 0.03744622604614783, + "grad_norm": 9.487191562229055, + "learning_rate": 5e-05, + "loss": 0.1889, + "num_input_tokens_seen": 37144708, + "step": 383 + }, + { + "epoch": 0.03744622604614783, + "loss": 0.14789384603500366, + "loss_ce": 0.00824541412293911, + "loss_iou": 0.28515625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 37144708, + "step": 383 + }, + { + "epoch": 0.03754399687133359, + "grad_norm": 13.523179309397577, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 37241560, + "step": 384 + }, + { + "epoch": 0.03754399687133359, + "loss": 0.1349947154521942, + "loss_ce": 0.0063631245866417885, + "loss_iou": 0.412109375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 37241560, + "step": 384 + }, + { + "epoch": 0.037641767696519356, + "grad_norm": 3.7650943930405023, + "learning_rate": 5e-05, + "loss": 0.1396, + "num_input_tokens_seen": 37338556, + "step": 385 + }, + { + "epoch": 0.037641767696519356, + "loss": 0.13236039876937866, + "loss_ce": 0.003850871231406927, + "loss_iou": 0.3203125, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 37338556, + "step": 385 + }, + { + "epoch": 0.03773953852170513, + "grad_norm": 6.221287636366194, + "learning_rate": 5e-05, + "loss": 0.2132, + "num_input_tokens_seen": 37435648, + "step": 386 + }, + { + "epoch": 0.03773953852170513, + "loss": 0.22144430875778198, + "loss_ce": 0.009988013654947281, + "loss_iou": 0.42578125, + "loss_num": 0.042236328125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 37435648, + "step": 386 + }, + { + "epoch": 0.03783730934689089, + "grad_norm": 4.233054770802368, + "learning_rate": 5e-05, + "loss": 0.1622, + "num_input_tokens_seen": 37533020, + "step": 387 + }, + { + "epoch": 0.03783730934689089, + "loss": 0.1752602756023407, + "loss_ce": 0.0094277523458004, + "loss_iou": 0.28515625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 37533020, + "step": 387 + }, + { + "epoch": 0.037935080172076655, + "grad_norm": 27.36698105889913, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 37629852, + "step": 388 + }, + { + "epoch": 0.037935080172076655, + "loss": 0.10585843771696091, + "loss_ce": 0.005905740894377232, + "loss_iou": 0.2451171875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 37629852, + "step": 388 + }, + { + "epoch": 0.03803285099726242, + "grad_norm": 4.5794177540049334, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 37727600, + "step": 389 + }, + { + "epoch": 0.03803285099726242, + "loss": 0.1398848295211792, + "loss_ce": 0.00655351672321558, + "loss_iou": 0.337890625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 37727600, + "step": 389 + }, + { + "epoch": 0.03813062182244818, + "grad_norm": 4.725152976369543, + "learning_rate": 5e-05, + "loss": 0.1646, + "num_input_tokens_seen": 37823992, + "step": 390 + }, + { + "epoch": 0.03813062182244818, + "loss": 0.2102859914302826, + "loss_ce": 0.01054846029728651, + "loss_iou": 0.376953125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 37823992, + "step": 390 + }, + { + "epoch": 0.038228392647633946, + "grad_norm": 2.9274898277405907, + "learning_rate": 5e-05, + "loss": 0.1938, + "num_input_tokens_seen": 37920640, + "step": 391 + }, + { + "epoch": 0.038228392647633946, + "loss": 0.221269890666008, + "loss_ce": 0.008745476603507996, + "loss_iou": 0.40234375, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 37920640, + "step": 391 + }, + { + "epoch": 0.03832616347281971, + "grad_norm": 6.828196343738507, + "learning_rate": 5e-05, + "loss": 0.2297, + "num_input_tokens_seen": 38018252, + "step": 392 + }, + { + "epoch": 0.03832616347281971, + "loss": 0.2194662243127823, + "loss_ce": 0.008162526413798332, + "loss_iou": 0.3125, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 38018252, + "step": 392 + }, + { + "epoch": 0.038423934298005474, + "grad_norm": 4.238289580879835, + "learning_rate": 5e-05, + "loss": 0.1863, + "num_input_tokens_seen": 38114932, + "step": 393 + }, + { + "epoch": 0.038423934298005474, + "loss": 0.1376625895500183, + "loss_ce": 0.009061522781848907, + "loss_iou": 0.2578125, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 38114932, + "step": 393 + }, + { + "epoch": 0.03852170512319124, + "grad_norm": 7.124751505590493, + "learning_rate": 5e-05, + "loss": 0.1818, + "num_input_tokens_seen": 38211808, + "step": 394 + }, + { + "epoch": 0.03852170512319124, + "loss": 0.17831411957740784, + "loss_ce": 0.013305585831403732, + "loss_iou": 0.490234375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 38211808, + "step": 394 + }, + { + "epoch": 0.038619475948377, + "grad_norm": 4.759845386675514, + "learning_rate": 5e-05, + "loss": 0.1553, + "num_input_tokens_seen": 38309248, + "step": 395 + }, + { + "epoch": 0.038619475948377, + "loss": 0.19856128096580505, + "loss_ce": 0.00684985239058733, + "loss_iou": 0.328125, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 38309248, + "step": 395 + }, + { + "epoch": 0.038717246773562766, + "grad_norm": 5.786820922968136, + "learning_rate": 5e-05, + "loss": 0.1932, + "num_input_tokens_seen": 38405704, + "step": 396 + }, + { + "epoch": 0.038717246773562766, + "loss": 0.16011154651641846, + "loss_ce": 0.008210314437747002, + "loss_iou": 0.29296875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 38405704, + "step": 396 + }, + { + "epoch": 0.03881501759874853, + "grad_norm": 3.919519764124308, + "learning_rate": 5e-05, + "loss": 0.1892, + "num_input_tokens_seen": 38502960, + "step": 397 + }, + { + "epoch": 0.03881501759874853, + "loss": 0.22288280725479126, + "loss_ce": 0.005658687092363834, + "loss_iou": 0.30078125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 38502960, + "step": 397 + }, + { + "epoch": 0.0389127884239343, + "grad_norm": 7.0685266079149685, + "learning_rate": 5e-05, + "loss": 0.1325, + "num_input_tokens_seen": 38600340, + "step": 398 + }, + { + "epoch": 0.0389127884239343, + "loss": 0.09915027767419815, + "loss_ce": 0.00790271908044815, + "loss_iou": 0.443359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 38600340, + "step": 398 + }, + { + "epoch": 0.039010559249120065, + "grad_norm": 11.262027410805475, + "learning_rate": 5e-05, + "loss": 0.1842, + "num_input_tokens_seen": 38697572, + "step": 399 + }, + { + "epoch": 0.039010559249120065, + "loss": 0.17351675033569336, + "loss_ce": 0.007958884350955486, + "loss_iou": 0.4453125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 38697572, + "step": 399 + }, + { + "epoch": 0.03910833007430583, + "grad_norm": 6.863215427848174, + "learning_rate": 5e-05, + "loss": 0.2668, + "num_input_tokens_seen": 38794432, + "step": 400 + }, + { + "epoch": 0.03910833007430583, + "loss": 0.2131958305835724, + "loss_ce": 0.008361855521798134, + "loss_iou": 0.41796875, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 38794432, + "step": 400 + }, + { + "epoch": 0.03920610089949159, + "grad_norm": 4.488074962184981, + "learning_rate": 5e-05, + "loss": 0.2172, + "num_input_tokens_seen": 38891868, + "step": 401 + }, + { + "epoch": 0.03920610089949159, + "loss": 0.17075473070144653, + "loss_ce": 0.008126560598611832, + "loss_iou": 0.396484375, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 38891868, + "step": 401 + }, + { + "epoch": 0.03930387172467736, + "grad_norm": 3.2662258665117365, + "learning_rate": 5e-05, + "loss": 0.25, + "num_input_tokens_seen": 38989424, + "step": 402 + }, + { + "epoch": 0.03930387172467736, + "loss": 0.24500223994255066, + "loss_ce": 0.00971170887351036, + "loss_iou": 0.265625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 38989424, + "step": 402 + }, + { + "epoch": 0.03940164254986312, + "grad_norm": 3.5968015502376627, + "learning_rate": 5e-05, + "loss": 0.2477, + "num_input_tokens_seen": 39086984, + "step": 403 + }, + { + "epoch": 0.03940164254986312, + "loss": 0.18276919424533844, + "loss_ce": 0.00820864923298359, + "loss_iou": 0.380859375, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 39086984, + "step": 403 + }, + { + "epoch": 0.039499413375048885, + "grad_norm": 9.957252364803683, + "learning_rate": 5e-05, + "loss": 0.1756, + "num_input_tokens_seen": 39184268, + "step": 404 + }, + { + "epoch": 0.039499413375048885, + "loss": 0.18020841479301453, + "loss_ce": 0.005464766174554825, + "loss_iou": 0.37890625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 39184268, + "step": 404 + }, + { + "epoch": 0.03959718420023465, + "grad_norm": 3.2235538552329652, + "learning_rate": 5e-05, + "loss": 0.1704, + "num_input_tokens_seen": 39280204, + "step": 405 + }, + { + "epoch": 0.03959718420023465, + "loss": 0.1717631220817566, + "loss_ce": 0.005411812104284763, + "loss_iou": 0.470703125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 39280204, + "step": 405 + }, + { + "epoch": 0.03969495502542041, + "grad_norm": 5.994701151083211, + "learning_rate": 5e-05, + "loss": 0.1629, + "num_input_tokens_seen": 39377852, + "step": 406 + }, + { + "epoch": 0.03969495502542041, + "loss": 0.18669086694717407, + "loss_ce": 0.00959736667573452, + "loss_iou": 0.369140625, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 39377852, + "step": 406 + }, + { + "epoch": 0.03979272585060618, + "grad_norm": 4.359062722709186, + "learning_rate": 5e-05, + "loss": 0.2335, + "num_input_tokens_seen": 39475564, + "step": 407 + }, + { + "epoch": 0.03979272585060618, + "loss": 0.2520678639411926, + "loss_ce": 0.002800265559926629, + "loss_iou": 0.47265625, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 39475564, + "step": 407 + }, + { + "epoch": 0.03989049667579194, + "grad_norm": 4.35344181963277, + "learning_rate": 5e-05, + "loss": 0.1869, + "num_input_tokens_seen": 39572860, + "step": 408 + }, + { + "epoch": 0.03989049667579194, + "loss": 0.1508096605539322, + "loss_ce": 0.008170497603714466, + "loss_iou": 0.30078125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 39572860, + "step": 408 + }, + { + "epoch": 0.03998826750097771, + "grad_norm": 14.229426080201192, + "learning_rate": 5e-05, + "loss": 0.2108, + "num_input_tokens_seen": 39670092, + "step": 409 + }, + { + "epoch": 0.03998826750097771, + "loss": 0.19122366607189178, + "loss_ce": 0.01059011835604906, + "loss_iou": 0.34375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 39670092, + "step": 409 + }, + { + "epoch": 0.040086038326163476, + "grad_norm": 23.129216190638232, + "learning_rate": 5e-05, + "loss": 0.1851, + "num_input_tokens_seen": 39767308, + "step": 410 + }, + { + "epoch": 0.040086038326163476, + "loss": 0.20718826353549957, + "loss_ce": 0.008213656954467297, + "loss_iou": 0.337890625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 39767308, + "step": 410 + }, + { + "epoch": 0.04018380915134924, + "grad_norm": 5.107888188082573, + "learning_rate": 5e-05, + "loss": 0.1577, + "num_input_tokens_seen": 39864424, + "step": 411 + }, + { + "epoch": 0.04018380915134924, + "loss": 0.16855394840240479, + "loss_ce": 0.0025383338797837496, + "loss_iou": 0.439453125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 39864424, + "step": 411 + }, + { + "epoch": 0.040281579976535004, + "grad_norm": 3.6549145267048955, + "learning_rate": 5e-05, + "loss": 0.1409, + "num_input_tokens_seen": 39961724, + "step": 412 + }, + { + "epoch": 0.040281579976535004, + "loss": 0.16865049302577972, + "loss_ce": 0.013949258252978325, + "loss_iou": 0.353515625, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 39961724, + "step": 412 + }, + { + "epoch": 0.04037935080172077, + "grad_norm": 14.531437901641636, + "learning_rate": 5e-05, + "loss": 0.172, + "num_input_tokens_seen": 40058304, + "step": 413 + }, + { + "epoch": 0.04037935080172077, + "loss": 0.18359380960464478, + "loss_ce": 0.004974424839019775, + "loss_iou": 0.302734375, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 40058304, + "step": 413 + }, + { + "epoch": 0.04047712162690653, + "grad_norm": 4.007323546689225, + "learning_rate": 5e-05, + "loss": 0.2234, + "num_input_tokens_seen": 40154924, + "step": 414 + }, + { + "epoch": 0.04047712162690653, + "loss": 0.24006669223308563, + "loss_ce": 0.008773962035775185, + "loss_iou": 0.365234375, + "loss_num": 0.046142578125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 40154924, + "step": 414 + }, + { + "epoch": 0.040574892452092295, + "grad_norm": 6.368767720703478, + "learning_rate": 5e-05, + "loss": 0.2073, + "num_input_tokens_seen": 40252316, + "step": 415 + }, + { + "epoch": 0.040574892452092295, + "loss": 0.2338756024837494, + "loss_ce": 0.00993761420249939, + "loss_iou": 0.470703125, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 40252316, + "step": 415 + }, + { + "epoch": 0.04067266327727806, + "grad_norm": 4.173986824590205, + "learning_rate": 5e-05, + "loss": 0.209, + "num_input_tokens_seen": 40350468, + "step": 416 + }, + { + "epoch": 0.04067266327727806, + "loss": 0.24171555042266846, + "loss_ce": 0.009110569022595882, + "loss_iou": 0.43359375, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 40350468, + "step": 416 + }, + { + "epoch": 0.04077043410246382, + "grad_norm": 5.579660448479756, + "learning_rate": 5e-05, + "loss": 0.2107, + "num_input_tokens_seen": 40446552, + "step": 417 + }, + { + "epoch": 0.04077043410246382, + "loss": 0.24486371874809265, + "loss_ce": 0.005411354824900627, + "loss_iou": 0.30078125, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 40446552, + "step": 417 + }, + { + "epoch": 0.04086820492764959, + "grad_norm": 3.3896339955416583, + "learning_rate": 5e-05, + "loss": 0.1902, + "num_input_tokens_seen": 40543088, + "step": 418 + }, + { + "epoch": 0.04086820492764959, + "loss": 0.23187805712223053, + "loss_ce": 0.0045831347815692425, + "loss_iou": 0.25, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 40543088, + "step": 418 + }, + { + "epoch": 0.04096597575283535, + "grad_norm": 5.823031864081709, + "learning_rate": 5e-05, + "loss": 0.1991, + "num_input_tokens_seen": 40641072, + "step": 419 + }, + { + "epoch": 0.04096597575283535, + "loss": 0.261044442653656, + "loss_ce": 0.0122346431016922, + "loss_iou": 0.35546875, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 40641072, + "step": 419 + }, + { + "epoch": 0.041063746578021115, + "grad_norm": 7.334026418784686, + "learning_rate": 5e-05, + "loss": 0.1809, + "num_input_tokens_seen": 40737824, + "step": 420 + }, + { + "epoch": 0.041063746578021115, + "loss": 0.2418113499879837, + "loss_ce": 0.007802543696016073, + "loss_iou": 0.296875, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 40737824, + "step": 420 + }, + { + "epoch": 0.041161517403206886, + "grad_norm": 70.89961668276868, + "learning_rate": 5e-05, + "loss": 0.214, + "num_input_tokens_seen": 40834588, + "step": 421 + }, + { + "epoch": 0.041161517403206886, + "loss": 0.19044449925422668, + "loss_ce": 0.007705247960984707, + "loss_iou": 0.34375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 40834588, + "step": 421 + }, + { + "epoch": 0.04125928822839265, + "grad_norm": 7.57658127320866, + "learning_rate": 5e-05, + "loss": 0.153, + "num_input_tokens_seen": 40931956, + "step": 422 + }, + { + "epoch": 0.04125928822839265, + "loss": 0.14292488992214203, + "loss_ce": 0.00950203649699688, + "loss_iou": 0.2353515625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 40931956, + "step": 422 + }, + { + "epoch": 0.041357059053578414, + "grad_norm": 3.2502872385745385, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 41029180, + "step": 423 + }, + { + "epoch": 0.041357059053578414, + "loss": 0.1463804543018341, + "loss_ce": 0.007189787924289703, + "loss_iou": 0.5859375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 41029180, + "step": 423 + }, + { + "epoch": 0.04145482987876418, + "grad_norm": 5.874897558051645, + "learning_rate": 5e-05, + "loss": 0.2029, + "num_input_tokens_seen": 41125176, + "step": 424 + }, + { + "epoch": 0.04145482987876418, + "loss": 0.13622204959392548, + "loss_ce": 0.005677868612110615, + "loss_iou": 0.30859375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 41125176, + "step": 424 + }, + { + "epoch": 0.04155260070394994, + "grad_norm": 9.046176345307282, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 41221836, + "step": 425 + }, + { + "epoch": 0.04155260070394994, + "loss": 0.11767017841339111, + "loss_ce": 0.0057164449244737625, + "loss_iou": 0.251953125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 41221836, + "step": 425 + }, + { + "epoch": 0.041650371529135706, + "grad_norm": 4.6934483950447055, + "learning_rate": 5e-05, + "loss": 0.192, + "num_input_tokens_seen": 41318724, + "step": 426 + }, + { + "epoch": 0.041650371529135706, + "loss": 0.18911659717559814, + "loss_ce": 0.006438365206122398, + "loss_iou": 0.240234375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 41318724, + "step": 426 + }, + { + "epoch": 0.04174814235432147, + "grad_norm": 10.138923228245096, + "learning_rate": 5e-05, + "loss": 0.1655, + "num_input_tokens_seen": 41415868, + "step": 427 + }, + { + "epoch": 0.04174814235432147, + "loss": 0.18189987540245056, + "loss_ce": 0.001968231052160263, + "loss_iou": 0.423828125, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 41415868, + "step": 427 + }, + { + "epoch": 0.041845913179507234, + "grad_norm": 10.275294612749647, + "learning_rate": 5e-05, + "loss": 0.1789, + "num_input_tokens_seen": 41512736, + "step": 428 + }, + { + "epoch": 0.041845913179507234, + "loss": 0.15985386073589325, + "loss_ce": 0.005862165242433548, + "loss_iou": 0.3515625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 41512736, + "step": 428 + }, + { + "epoch": 0.041943684004693, + "grad_norm": 4.409942093818965, + "learning_rate": 5e-05, + "loss": 0.1748, + "num_input_tokens_seen": 41609528, + "step": 429 + }, + { + "epoch": 0.041943684004693, + "loss": 0.2128026783466339, + "loss_ce": 0.005725645460188389, + "loss_iou": 0.306640625, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 41609528, + "step": 429 + }, + { + "epoch": 0.04204145482987876, + "grad_norm": 8.584506272583976, + "learning_rate": 5e-05, + "loss": 0.1935, + "num_input_tokens_seen": 41706028, + "step": 430 + }, + { + "epoch": 0.04204145482987876, + "loss": 0.17351442575454712, + "loss_ce": 0.0030432315543293953, + "loss_iou": 0.3984375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 41706028, + "step": 430 + }, + { + "epoch": 0.042139225655064526, + "grad_norm": 9.689059812595271, + "learning_rate": 5e-05, + "loss": 0.1759, + "num_input_tokens_seen": 41802416, + "step": 431 + }, + { + "epoch": 0.042139225655064526, + "loss": 0.18650823831558228, + "loss_ce": 0.01591498591005802, + "loss_iou": 0.41015625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 41802416, + "step": 431 + }, + { + "epoch": 0.0422369964802503, + "grad_norm": 40.02755788160547, + "learning_rate": 5e-05, + "loss": 0.6096, + "num_input_tokens_seen": 41898852, + "step": 432 + }, + { + "epoch": 0.0422369964802503, + "loss": 0.5815030932426453, + "loss_ce": 0.45070475339889526, + "loss_iou": 0.421875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 41898852, + "step": 432 + }, + { + "epoch": 0.04233476730543606, + "grad_norm": 47.400966113900964, + "learning_rate": 5e-05, + "loss": 0.845, + "num_input_tokens_seen": 41995820, + "step": 433 + }, + { + "epoch": 0.04233476730543606, + "loss": 0.8952614068984985, + "loss_ce": 0.6963478326797485, + "loss_iou": 0.345703125, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 41995820, + "step": 433 + }, + { + "epoch": 0.042432538130621825, + "grad_norm": 52.27708288283881, + "learning_rate": 5e-05, + "loss": 1.5545, + "num_input_tokens_seen": 42092636, + "step": 434 + }, + { + "epoch": 0.042432538130621825, + "loss": 1.507116675376892, + "loss_ce": 1.391821265220642, + "loss_iou": 0.40625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 42092636, + "step": 434 + }, + { + "epoch": 0.04253030895580759, + "grad_norm": 77.33024509617822, + "learning_rate": 5e-05, + "loss": 2.0064, + "num_input_tokens_seen": 42190180, + "step": 435 + }, + { + "epoch": 0.04253030895580759, + "loss": 2.026257038116455, + "loss_ce": 1.859325885772705, + "loss_iou": 0.349609375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 42190180, + "step": 435 + }, + { + "epoch": 0.04262807978099335, + "grad_norm": 52.33083700594337, + "learning_rate": 5e-05, + "loss": 1.4039, + "num_input_tokens_seen": 42287336, + "step": 436 + }, + { + "epoch": 0.04262807978099335, + "loss": 1.4529378414154053, + "loss_ce": 1.1534383296966553, + "loss_iou": 0.357421875, + "loss_num": 0.06005859375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 42287336, + "step": 436 + }, + { + "epoch": 0.04272585060617912, + "grad_norm": 21.847611638235136, + "learning_rate": 5e-05, + "loss": 0.9533, + "num_input_tokens_seen": 42384112, + "step": 437 + }, + { + "epoch": 0.04272585060617912, + "loss": 1.0280041694641113, + "loss_ce": 0.7755626440048218, + "loss_iou": 0.3203125, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 42384112, + "step": 437 + }, + { + "epoch": 0.04282362143136488, + "grad_norm": 11.000384553141227, + "learning_rate": 5e-05, + "loss": 0.3277, + "num_input_tokens_seen": 42481428, + "step": 438 + }, + { + "epoch": 0.04282362143136488, + "loss": 0.3078995943069458, + "loss_ce": 0.1321183294057846, + "loss_iou": 0.388671875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 42481428, + "step": 438 + }, + { + "epoch": 0.042921392256550644, + "grad_norm": 52.47714772085781, + "learning_rate": 5e-05, + "loss": 0.2308, + "num_input_tokens_seen": 42578296, + "step": 439 + }, + { + "epoch": 0.042921392256550644, + "loss": 0.2533125877380371, + "loss_ce": 0.0212569423019886, + "loss_iou": 0.267578125, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 42578296, + "step": 439 + }, + { + "epoch": 0.04301916308173641, + "grad_norm": 4.981512164314068, + "learning_rate": 5e-05, + "loss": 0.1816, + "num_input_tokens_seen": 42675484, + "step": 440 + }, + { + "epoch": 0.04301916308173641, + "loss": 0.1939343512058258, + "loss_ce": 0.014185830950737, + "loss_iou": 0.31640625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 42675484, + "step": 440 + }, + { + "epoch": 0.04311693390692217, + "grad_norm": 8.635977345874574, + "learning_rate": 5e-05, + "loss": 0.1791, + "num_input_tokens_seen": 42772340, + "step": 441 + }, + { + "epoch": 0.04311693390692217, + "loss": 0.1378687620162964, + "loss_ce": 0.013570668175816536, + "loss_iou": 0.421875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 42772340, + "step": 441 + }, + { + "epoch": 0.043214704732107936, + "grad_norm": 5.589053534211369, + "learning_rate": 5e-05, + "loss": 0.1947, + "num_input_tokens_seen": 42870232, + "step": 442 + }, + { + "epoch": 0.043214704732107936, + "loss": 0.19262585043907166, + "loss_ce": 0.010832643136382103, + "loss_iou": 0.4296875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 42870232, + "step": 442 + }, + { + "epoch": 0.0433124755572937, + "grad_norm": 8.09677091213386, + "learning_rate": 5e-05, + "loss": 0.2037, + "num_input_tokens_seen": 42966512, + "step": 443 + }, + { + "epoch": 0.0433124755572937, + "loss": 0.21366482973098755, + "loss_ce": 0.01026516780257225, + "loss_iou": 0.326171875, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 42966512, + "step": 443 + }, + { + "epoch": 0.04341024638247947, + "grad_norm": 2.974653229199291, + "learning_rate": 5e-05, + "loss": 0.1589, + "num_input_tokens_seen": 43064276, + "step": 444 + }, + { + "epoch": 0.04341024638247947, + "loss": 0.16417774558067322, + "loss_ce": 0.00765307666733861, + "loss_iou": 0.435546875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 43064276, + "step": 444 + }, + { + "epoch": 0.043508017207665235, + "grad_norm": 5.004244585018428, + "learning_rate": 5e-05, + "loss": 0.1648, + "num_input_tokens_seen": 43161508, + "step": 445 + }, + { + "epoch": 0.043508017207665235, + "loss": 0.2018072009086609, + "loss_ce": 0.007654364220798016, + "loss_iou": 0.478515625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 43161508, + "step": 445 + }, + { + "epoch": 0.043605788032851, + "grad_norm": 6.842825547475833, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 43258936, + "step": 446 + }, + { + "epoch": 0.043605788032851, + "loss": 0.14763513207435608, + "loss_ce": 0.008383422158658504, + "loss_iou": 0.341796875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 43258936, + "step": 446 + }, + { + "epoch": 0.04370355885803676, + "grad_norm": 5.485261669752105, + "learning_rate": 5e-05, + "loss": 0.1501, + "num_input_tokens_seen": 43356604, + "step": 447 + }, + { + "epoch": 0.04370355885803676, + "loss": 0.15413641929626465, + "loss_ce": 0.014030229300260544, + "loss_iou": 0.33203125, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 43356604, + "step": 447 + }, + { + "epoch": 0.04380132968322253, + "grad_norm": 1.7350851594143333, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 43452900, + "step": 448 + }, + { + "epoch": 0.04380132968322253, + "loss": 0.08972691744565964, + "loss_ce": 0.009610641747713089, + "loss_iou": 0.228515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 43452900, + "step": 448 + }, + { + "epoch": 0.04389910050840829, + "grad_norm": 2.6015191332488365, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 43550116, + "step": 449 + }, + { + "epoch": 0.04389910050840829, + "loss": 0.11265504360198975, + "loss_ce": 0.005431527737528086, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 43550116, + "step": 449 + }, + { + "epoch": 0.043996871333594055, + "grad_norm": 3.842882191594252, + "learning_rate": 5e-05, + "loss": 0.1257, + "num_input_tokens_seen": 43647580, + "step": 450 + }, + { + "epoch": 0.043996871333594055, + "loss": 0.11023657023906708, + "loss_ce": 0.011023931205272675, + "loss_iou": 0.412109375, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 43647580, + "step": 450 + }, + { + "epoch": 0.04409464215877982, + "grad_norm": 5.738434101691167, + "learning_rate": 5e-05, + "loss": 0.127, + "num_input_tokens_seen": 43744380, + "step": 451 + }, + { + "epoch": 0.04409464215877982, + "loss": 0.14523077011108398, + "loss_ce": 0.006741986144334078, + "loss_iou": 0.30078125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 43744380, + "step": 451 + }, + { + "epoch": 0.04419241298396558, + "grad_norm": 4.54474689268125, + "learning_rate": 5e-05, + "loss": 0.1334, + "num_input_tokens_seen": 43839996, + "step": 452 + }, + { + "epoch": 0.04419241298396558, + "loss": 0.12140330672264099, + "loss_ce": 0.014134016819298267, + "loss_iou": 0.345703125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 43839996, + "step": 452 + }, + { + "epoch": 0.04429018380915135, + "grad_norm": 7.193384725705121, + "learning_rate": 5e-05, + "loss": 0.1435, + "num_input_tokens_seen": 43937032, + "step": 453 + }, + { + "epoch": 0.04429018380915135, + "loss": 0.14655381441116333, + "loss_ce": 0.01003342680633068, + "loss_iou": 0.408203125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 43937032, + "step": 453 + }, + { + "epoch": 0.04438795463433711, + "grad_norm": 5.066194051871663, + "learning_rate": 5e-05, + "loss": 0.2034, + "num_input_tokens_seen": 44034964, + "step": 454 + }, + { + "epoch": 0.04438795463433711, + "loss": 0.19936572015285492, + "loss_ce": 0.012659192085266113, + "loss_iou": 0.392578125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 44034964, + "step": 454 + }, + { + "epoch": 0.04448572545952288, + "grad_norm": 6.045464987844919, + "learning_rate": 5e-05, + "loss": 0.1827, + "num_input_tokens_seen": 44131356, + "step": 455 + }, + { + "epoch": 0.04448572545952288, + "loss": 0.1632193624973297, + "loss_ce": 0.009593881666660309, + "loss_iou": 0.37109375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 44131356, + "step": 455 + }, + { + "epoch": 0.044583496284708646, + "grad_norm": 48.571961208384685, + "learning_rate": 5e-05, + "loss": 0.1545, + "num_input_tokens_seen": 44228368, + "step": 456 + }, + { + "epoch": 0.044583496284708646, + "loss": 0.12177075445652008, + "loss_ce": 0.004888439085334539, + "loss_iou": 0.421875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 44228368, + "step": 456 + }, + { + "epoch": 0.04468126710989441, + "grad_norm": 5.6355410594563855, + "learning_rate": 5e-05, + "loss": 0.1632, + "num_input_tokens_seen": 44325460, + "step": 457 + }, + { + "epoch": 0.04468126710989441, + "loss": 0.18856693804264069, + "loss_ce": 0.007460369728505611, + "loss_iou": 0.357421875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 44325460, + "step": 457 + }, + { + "epoch": 0.044779037935080174, + "grad_norm": 10.181960788552477, + "learning_rate": 5e-05, + "loss": 0.1898, + "num_input_tokens_seen": 44422876, + "step": 458 + }, + { + "epoch": 0.044779037935080174, + "loss": 0.21236643195152283, + "loss_ce": 0.0099738584831357, + "loss_iou": 0.43359375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 44422876, + "step": 458 + }, + { + "epoch": 0.04487680876026594, + "grad_norm": 3.3289656161129537, + "learning_rate": 5e-05, + "loss": 0.1244, + "num_input_tokens_seen": 44519916, + "step": 459 + }, + { + "epoch": 0.04487680876026594, + "loss": 0.10086624324321747, + "loss_ce": 0.007238310761749744, + "loss_iou": 0.265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 44519916, + "step": 459 + }, + { + "epoch": 0.0449745795854517, + "grad_norm": 5.038486791354875, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 44617864, + "step": 460 + }, + { + "epoch": 0.0449745795854517, + "loss": 0.11299960315227509, + "loss_ce": 0.00505893724039197, + "loss_iou": 0.451171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 44617864, + "step": 460 + }, + { + "epoch": 0.045072350410637466, + "grad_norm": 2.248667723856103, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 44714800, + "step": 461 + }, + { + "epoch": 0.045072350410637466, + "loss": 0.0699014663696289, + "loss_ce": 0.004723548889160156, + "loss_iou": 0.28515625, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 44714800, + "step": 461 + }, + { + "epoch": 0.04517012123582323, + "grad_norm": 4.79647572008673, + "learning_rate": 5e-05, + "loss": 0.155, + "num_input_tokens_seen": 44811692, + "step": 462 + }, + { + "epoch": 0.04517012123582323, + "loss": 0.20358437299728394, + "loss_ce": 0.009103469550609589, + "loss_iou": 0.2490234375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 44811692, + "step": 462 + }, + { + "epoch": 0.045267892061008994, + "grad_norm": 7.3839805460622285, + "learning_rate": 5e-05, + "loss": 0.2064, + "num_input_tokens_seen": 44908664, + "step": 463 + }, + { + "epoch": 0.045267892061008994, + "loss": 0.18365135788917542, + "loss_ce": 0.011562751606106758, + "loss_iou": 0.24609375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 44908664, + "step": 463 + }, + { + "epoch": 0.04536566288619476, + "grad_norm": 5.977086516445661, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 45005364, + "step": 464 + }, + { + "epoch": 0.04536566288619476, + "loss": 0.14769259095191956, + "loss_ce": 0.011004351079463959, + "loss_iou": 0.330078125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 45005364, + "step": 464 + }, + { + "epoch": 0.04546343371138052, + "grad_norm": 7.183031194567489, + "learning_rate": 5e-05, + "loss": 0.1922, + "num_input_tokens_seen": 45102520, + "step": 465 + }, + { + "epoch": 0.04546343371138052, + "loss": 0.28716033697128296, + "loss_ce": 0.00938933715224266, + "loss_iou": 0.31640625, + "loss_num": 0.0556640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 45102520, + "step": 465 + }, + { + "epoch": 0.045561204536566285, + "grad_norm": 4.425036371051363, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 45199740, + "step": 466 + }, + { + "epoch": 0.045561204536566285, + "loss": 0.09955470263957977, + "loss_ce": 0.009009042754769325, + "loss_iou": 0.26171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 45199740, + "step": 466 + }, + { + "epoch": 0.045658975361752056, + "grad_norm": 3.4915637353586186, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 45296164, + "step": 467 + }, + { + "epoch": 0.045658975361752056, + "loss": 0.10070209205150604, + "loss_ce": 0.005563544575124979, + "loss_iou": 0.423828125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 45296164, + "step": 467 + }, + { + "epoch": 0.04575674618693782, + "grad_norm": 8.338307750170355, + "learning_rate": 5e-05, + "loss": 0.2036, + "num_input_tokens_seen": 45393156, + "step": 468 + }, + { + "epoch": 0.04575674618693782, + "loss": 0.24327854812145233, + "loss_ce": 0.008384741842746735, + "loss_iou": 0.419921875, + "loss_num": 0.046875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 45393156, + "step": 468 + }, + { + "epoch": 0.045854517012123584, + "grad_norm": 6.132621363204153, + "learning_rate": 5e-05, + "loss": 0.2157, + "num_input_tokens_seen": 45490764, + "step": 469 + }, + { + "epoch": 0.045854517012123584, + "loss": 0.21854552626609802, + "loss_ce": 0.006703958846628666, + "loss_iou": 0.291015625, + "loss_num": 0.042236328125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 45490764, + "step": 469 + }, + { + "epoch": 0.04595228783730935, + "grad_norm": 4.769970505621851, + "learning_rate": 5e-05, + "loss": 0.1606, + "num_input_tokens_seen": 45589100, + "step": 470 + }, + { + "epoch": 0.04595228783730935, + "loss": 0.1915649026632309, + "loss_ce": 0.005804411601275206, + "loss_iou": 0.48828125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 45589100, + "step": 470 + }, + { + "epoch": 0.04605005866249511, + "grad_norm": 5.98538138449846, + "learning_rate": 5e-05, + "loss": 0.1758, + "num_input_tokens_seen": 45686652, + "step": 471 + }, + { + "epoch": 0.04605005866249511, + "loss": 0.1580546796321869, + "loss_ce": 0.005924551282078028, + "loss_iou": 0.55859375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 45686652, + "step": 471 + }, + { + "epoch": 0.046147829487680876, + "grad_norm": 8.252819585910956, + "learning_rate": 5e-05, + "loss": 0.1358, + "num_input_tokens_seen": 45784052, + "step": 472 + }, + { + "epoch": 0.046147829487680876, + "loss": 0.13447879254817963, + "loss_ce": 0.007708778604865074, + "loss_iou": 0.484375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 45784052, + "step": 472 + }, + { + "epoch": 0.04624560031286664, + "grad_norm": 2.9821716185973854, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 45880512, + "step": 473 + }, + { + "epoch": 0.04624560031286664, + "loss": 0.09107071161270142, + "loss_ce": 0.005377350840717554, + "loss_iou": 0.466796875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 45880512, + "step": 473 + }, + { + "epoch": 0.046343371138052404, + "grad_norm": 7.7729970105628805, + "learning_rate": 5e-05, + "loss": 0.1396, + "num_input_tokens_seen": 45977360, + "step": 474 + }, + { + "epoch": 0.046343371138052404, + "loss": 0.16146673262119293, + "loss_ce": 0.009489195421338081, + "loss_iou": 0.421875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 45977360, + "step": 474 + }, + { + "epoch": 0.04644114196323817, + "grad_norm": 4.597077508366653, + "learning_rate": 5e-05, + "loss": 0.1658, + "num_input_tokens_seen": 46074204, + "step": 475 + }, + { + "epoch": 0.04644114196323817, + "loss": 0.1751500964164734, + "loss_ce": 0.004251658916473389, + "loss_iou": 0.396484375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 46074204, + "step": 475 + }, + { + "epoch": 0.04653891278842393, + "grad_norm": 50.052180747017744, + "learning_rate": 5e-05, + "loss": 0.1924, + "num_input_tokens_seen": 46171232, + "step": 476 + }, + { + "epoch": 0.04653891278842393, + "loss": 0.21276351809501648, + "loss_ce": 0.006769871339201927, + "loss_iou": 0.37890625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 46171232, + "step": 476 + }, + { + "epoch": 0.046636683613609696, + "grad_norm": 5.127717358059845, + "learning_rate": 5e-05, + "loss": 0.1602, + "num_input_tokens_seen": 46268288, + "step": 477 + }, + { + "epoch": 0.046636683613609696, + "loss": 0.13559238612651825, + "loss_ce": 0.00522129749879241, + "loss_iou": 0.28515625, + "loss_num": 0.0260009765625, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 46268288, + "step": 477 + }, + { + "epoch": 0.04673445443879547, + "grad_norm": 6.718981028734716, + "learning_rate": 5e-05, + "loss": 0.1487, + "num_input_tokens_seen": 46365168, + "step": 478 + }, + { + "epoch": 0.04673445443879547, + "loss": 0.13941754400730133, + "loss_ce": 0.01106061041355133, + "loss_iou": 0.34765625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 46365168, + "step": 478 + }, + { + "epoch": 0.04683222526398123, + "grad_norm": 11.22116092182673, + "learning_rate": 5e-05, + "loss": 0.1729, + "num_input_tokens_seen": 46462104, + "step": 479 + }, + { + "epoch": 0.04683222526398123, + "loss": 0.1973521113395691, + "loss_ce": 0.01095074787735939, + "loss_iou": 0.26171875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 46462104, + "step": 479 + }, + { + "epoch": 0.046929996089166995, + "grad_norm": 29.678655841557536, + "learning_rate": 5e-05, + "loss": 0.1531, + "num_input_tokens_seen": 46559744, + "step": 480 + }, + { + "epoch": 0.046929996089166995, + "loss": 0.11531445384025574, + "loss_ce": 0.0071296412497758865, + "loss_iou": 0.353515625, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 46559744, + "step": 480 + }, + { + "epoch": 0.04702776691435276, + "grad_norm": 6.019256428304304, + "learning_rate": 5e-05, + "loss": 0.1823, + "num_input_tokens_seen": 46657240, + "step": 481 + }, + { + "epoch": 0.04702776691435276, + "loss": 0.16023460030555725, + "loss_ce": 0.005479973740875721, + "loss_iou": 0.44140625, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 46657240, + "step": 481 + }, + { + "epoch": 0.04712553773953852, + "grad_norm": 2.7920581332527212, + "learning_rate": 5e-05, + "loss": 0.1428, + "num_input_tokens_seen": 46755072, + "step": 482 + }, + { + "epoch": 0.04712553773953852, + "loss": 0.111736960709095, + "loss_ce": 0.006146138533949852, + "loss_iou": 0.51953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 46755072, + "step": 482 + }, + { + "epoch": 0.04722330856472429, + "grad_norm": 8.273149266912057, + "learning_rate": 5e-05, + "loss": 0.2001, + "num_input_tokens_seen": 46852716, + "step": 483 + }, + { + "epoch": 0.04722330856472429, + "loss": 0.1903674155473709, + "loss_ce": 0.012633047997951508, + "loss_iou": 0.33203125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 46852716, + "step": 483 + }, + { + "epoch": 0.04732107938991005, + "grad_norm": 4.77892867030868, + "learning_rate": 5e-05, + "loss": 0.1664, + "num_input_tokens_seen": 46949552, + "step": 484 + }, + { + "epoch": 0.04732107938991005, + "loss": 0.1567268818616867, + "loss_ce": 0.0064888522028923035, + "loss_iou": 0.443359375, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 46949552, + "step": 484 + }, + { + "epoch": 0.047418850215095815, + "grad_norm": 5.89367327954814, + "learning_rate": 5e-05, + "loss": 0.1949, + "num_input_tokens_seen": 47046456, + "step": 485 + }, + { + "epoch": 0.047418850215095815, + "loss": 0.2272479087114334, + "loss_ce": 0.008986195549368858, + "loss_iou": 0.322265625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 47046456, + "step": 485 + }, + { + "epoch": 0.04751662104028158, + "grad_norm": 4.823877889687064, + "learning_rate": 5e-05, + "loss": 0.1982, + "num_input_tokens_seen": 47144208, + "step": 486 + }, + { + "epoch": 0.04751662104028158, + "loss": 0.23995260894298553, + "loss_ce": 0.0047231013886630535, + "loss_iou": 0.41015625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 47144208, + "step": 486 + }, + { + "epoch": 0.04761439186546734, + "grad_norm": 6.159217553788309, + "learning_rate": 5e-05, + "loss": 0.1629, + "num_input_tokens_seen": 47241080, + "step": 487 + }, + { + "epoch": 0.04761439186546734, + "loss": 0.1745411455631256, + "loss_ce": 0.007152240723371506, + "loss_iou": 0.314453125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 47241080, + "step": 487 + }, + { + "epoch": 0.04771216269065311, + "grad_norm": 6.370165635860628, + "learning_rate": 5e-05, + "loss": 0.1596, + "num_input_tokens_seen": 47337308, + "step": 488 + }, + { + "epoch": 0.04771216269065311, + "loss": 0.15767186880111694, + "loss_ce": 0.009173331782221794, + "loss_iou": 0.408203125, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 47337308, + "step": 488 + }, + { + "epoch": 0.04780993351583887, + "grad_norm": 6.3649169733328455, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 47434792, + "step": 489 + }, + { + "epoch": 0.04780993351583887, + "loss": 0.09449341148138046, + "loss_ce": 0.007945561781525612, + "loss_iou": 0.42578125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 47434792, + "step": 489 + }, + { + "epoch": 0.04790770434102464, + "grad_norm": 4.336475795941794, + "learning_rate": 5e-05, + "loss": 0.2196, + "num_input_tokens_seen": 47531444, + "step": 490 + }, + { + "epoch": 0.04790770434102464, + "loss": 0.22106380760669708, + "loss_ce": 0.011743748560547829, + "loss_iou": 0.4140625, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 47531444, + "step": 490 + }, + { + "epoch": 0.048005475166210405, + "grad_norm": 8.49542732235371, + "learning_rate": 5e-05, + "loss": 0.1653, + "num_input_tokens_seen": 47628252, + "step": 491 + }, + { + "epoch": 0.048005475166210405, + "loss": 0.12983796000480652, + "loss_ce": 0.004967658780515194, + "loss_iou": 0.34765625, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 47628252, + "step": 491 + }, + { + "epoch": 0.04810324599139617, + "grad_norm": 4.311144623328762, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 47725720, + "step": 492 + }, + { + "epoch": 0.04810324599139617, + "loss": 0.13781148195266724, + "loss_ce": 0.0036562117747962475, + "loss_iou": 0.51953125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 47725720, + "step": 492 + }, + { + "epoch": 0.04820101681658193, + "grad_norm": 5.723772597638937, + "learning_rate": 5e-05, + "loss": 0.203, + "num_input_tokens_seen": 47822572, + "step": 493 + }, + { + "epoch": 0.04820101681658193, + "loss": 0.1928330659866333, + "loss_ce": 0.007164110895246267, + "loss_iou": 0.275390625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 47822572, + "step": 493 + }, + { + "epoch": 0.0482987876417677, + "grad_norm": 7.748965468114441, + "learning_rate": 5e-05, + "loss": 0.1762, + "num_input_tokens_seen": 47920512, + "step": 494 + }, + { + "epoch": 0.0482987876417677, + "loss": 0.16204294562339783, + "loss_ce": 0.0088447080925107, + "loss_iou": 0.41015625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 47920512, + "step": 494 + }, + { + "epoch": 0.04839655846695346, + "grad_norm": 10.412284204300422, + "learning_rate": 5e-05, + "loss": 0.2147, + "num_input_tokens_seen": 48017400, + "step": 495 + }, + { + "epoch": 0.04839655846695346, + "loss": 0.15560251474380493, + "loss_ce": 0.008141579106450081, + "loss_iou": 0.30859375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 48017400, + "step": 495 + }, + { + "epoch": 0.048494329292139225, + "grad_norm": 4.722119326439738, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 48113916, + "step": 496 + }, + { + "epoch": 0.048494329292139225, + "loss": 0.14191441237926483, + "loss_ce": 0.00720981927588582, + "loss_iou": 0.3046875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 48113916, + "step": 496 + }, + { + "epoch": 0.04859210011732499, + "grad_norm": 4.747094596755015, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 48211788, + "step": 497 + }, + { + "epoch": 0.04859210011732499, + "loss": 0.14669574797153473, + "loss_ce": 0.008023872971534729, + "loss_iou": 0.43359375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 48211788, + "step": 497 + }, + { + "epoch": 0.04868987094251075, + "grad_norm": 4.369829342912541, + "learning_rate": 5e-05, + "loss": 0.1599, + "num_input_tokens_seen": 48309540, + "step": 498 + }, + { + "epoch": 0.04868987094251075, + "loss": 0.12375691533088684, + "loss_ce": 0.006508384831249714, + "loss_iou": 0.494140625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 48309540, + "step": 498 + }, + { + "epoch": 0.04878764176769652, + "grad_norm": 5.4017591547859345, + "learning_rate": 5e-05, + "loss": 0.1655, + "num_input_tokens_seen": 48406072, + "step": 499 + }, + { + "epoch": 0.04878764176769652, + "loss": 0.18463172018527985, + "loss_ce": 0.00799216516315937, + "loss_iou": 0.244140625, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 48406072, + "step": 499 + }, + { + "epoch": 0.04888541259288228, + "grad_norm": 7.298525550644252, + "learning_rate": 5e-05, + "loss": 0.1679, + "num_input_tokens_seen": 48503428, + "step": 500 + }, + { + "epoch": 0.04888541259288228, + "eval_seeclick_CIoU": 0.3799288123846054, + "eval_seeclick_GIoU": 0.3499634116888046, + "eval_seeclick_IoU": 0.43574216961860657, + "eval_seeclick_MAE_all": 0.08309679105877876, + "eval_seeclick_MAE_h": 0.03937210701406002, + "eval_seeclick_MAE_w": 0.11675798892974854, + "eval_seeclick_MAE_x": 0.13432593643665314, + "eval_seeclick_MAE_y": 0.0419311448931694, + "eval_seeclick_NUM_probability": 0.99795863032341, + "eval_seeclick_inside_bbox": 0.6931818127632141, + "eval_seeclick_loss": 0.3380654454231262, + "eval_seeclick_loss_ce": 0.02299127820879221, + "eval_seeclick_loss_iou": 0.6470947265625, + "eval_seeclick_loss_num": 0.0628662109375, + "eval_seeclick_loss_xval": 0.31396484375, + "eval_seeclick_runtime": 73.9059, + "eval_seeclick_samples_per_second": 0.582, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 48503428, + "step": 500 + }, + { + "epoch": 0.04888541259288228, + "eval_icons_CIoU": 0.4585985541343689, + "eval_icons_GIoU": 0.4463493824005127, + "eval_icons_IoU": 0.5017088353633881, + "eval_icons_MAE_all": 0.08551715314388275, + "eval_icons_MAE_h": 0.10464156046509743, + "eval_icons_MAE_w": 0.0806750413030386, + "eval_icons_MAE_x": 0.07468565553426743, + "eval_icons_MAE_y": 0.08206634595990181, + "eval_icons_NUM_probability": 0.9968151152133942, + "eval_icons_inside_bbox": 0.7916666567325592, + "eval_icons_loss": 0.2904282510280609, + "eval_icons_loss_ce": 0.00148358492879197, + "eval_icons_loss_iou": 0.44219970703125, + "eval_icons_loss_num": 0.06158447265625, + "eval_icons_loss_xval": 0.3077392578125, + "eval_icons_runtime": 83.8015, + "eval_icons_samples_per_second": 0.597, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 48503428, + "step": 500 + }, + { + "epoch": 0.04888541259288228, + "eval_screenspot_CIoU": 0.27294862767060596, + "eval_screenspot_GIoU": 0.2573075493176778, + "eval_screenspot_IoU": 0.3718930681546529, + "eval_screenspot_MAE_all": 0.16447073966264725, + "eval_screenspot_MAE_h": 0.11703578631083171, + "eval_screenspot_MAE_w": 0.23878484964370728, + "eval_screenspot_MAE_x": 0.2001253366470337, + "eval_screenspot_MAE_y": 0.10193699349959691, + "eval_screenspot_NUM_probability": 0.9981898069381714, + "eval_screenspot_inside_bbox": 0.6554166674613953, + "eval_screenspot_loss": 0.5901344418525696, + "eval_screenspot_loss_ce": 0.009798115119338036, + "eval_screenspot_loss_iou": 0.4787190755208333, + "eval_screenspot_loss_num": 0.11673990885416667, + "eval_screenspot_loss_xval": 0.5836588541666666, + "eval_screenspot_runtime": 153.9891, + "eval_screenspot_samples_per_second": 0.578, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 48503428, + "step": 500 + }, + { + "epoch": 0.04888541259288228, + "eval_compot_CIoU": 0.4502585232257843, + "eval_compot_GIoU": 0.4348968416452408, + "eval_compot_IoU": 0.506632924079895, + "eval_compot_MAE_all": 0.0923830196261406, + "eval_compot_MAE_h": 0.07442779093980789, + "eval_compot_MAE_w": 0.1192120723426342, + "eval_compot_MAE_x": 0.1082257516682148, + "eval_compot_MAE_y": 0.06766645237803459, + "eval_compot_NUM_probability": 0.9976800084114075, + "eval_compot_inside_bbox": 0.7361111044883728, + "eval_compot_loss": 0.4050656259059906, + "eval_compot_loss_ce": 0.10624613240361214, + "eval_compot_loss_iou": 0.486083984375, + "eval_compot_loss_num": 0.0576934814453125, + "eval_compot_loss_xval": 0.2884521484375, + "eval_compot_runtime": 85.0172, + "eval_compot_samples_per_second": 0.588, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 48503428, + "step": 500 + }, + { + "epoch": 0.04888541259288228, + "loss": 0.3645132780075073, + "loss_ce": 0.10578525066375732, + "loss_iou": 0.474609375, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 48503428, + "step": 500 + }, + { + "epoch": 0.048983183418068045, + "grad_norm": 3.3690660431330444, + "learning_rate": 5e-05, + "loss": 0.1659, + "num_input_tokens_seen": 48600820, + "step": 501 + }, + { + "epoch": 0.048983183418068045, + "loss": 0.16893333196640015, + "loss_ce": 0.0037416829727590084, + "loss_iou": 0.41796875, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 48600820, + "step": 501 + }, + { + "epoch": 0.049080954243253816, + "grad_norm": 3.520768472697063, + "learning_rate": 5e-05, + "loss": 0.2199, + "num_input_tokens_seen": 48698740, + "step": 502 + }, + { + "epoch": 0.049080954243253816, + "loss": 0.19370225071907043, + "loss_ce": 0.007941767573356628, + "loss_iou": 0.478515625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 48698740, + "step": 502 + }, + { + "epoch": 0.04917872506843958, + "grad_norm": 4.472458965955265, + "learning_rate": 5e-05, + "loss": 0.1661, + "num_input_tokens_seen": 48796356, + "step": 503 + }, + { + "epoch": 0.04917872506843958, + "loss": 0.16014572978019714, + "loss_ce": 0.009114233776926994, + "loss_iou": 0.392578125, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 48796356, + "step": 503 + }, + { + "epoch": 0.049276495893625344, + "grad_norm": 7.452194808563328, + "learning_rate": 5e-05, + "loss": 0.138, + "num_input_tokens_seen": 48894356, + "step": 504 + }, + { + "epoch": 0.049276495893625344, + "loss": 0.20959490537643433, + "loss_ce": 0.009521669708192348, + "loss_iou": 0.458984375, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 48894356, + "step": 504 + }, + { + "epoch": 0.04937426671881111, + "grad_norm": 4.1755238825174565, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 48991020, + "step": 505 + }, + { + "epoch": 0.04937426671881111, + "loss": 0.11961495876312256, + "loss_ce": 0.01201760582625866, + "loss_iou": 0.423828125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 48991020, + "step": 505 + }, + { + "epoch": 0.04947203754399687, + "grad_norm": 3.6640038810916007, + "learning_rate": 5e-05, + "loss": 0.1847, + "num_input_tokens_seen": 49088136, + "step": 506 + }, + { + "epoch": 0.04947203754399687, + "loss": 0.21977245807647705, + "loss_ce": 0.011215317994356155, + "loss_iou": 0.494140625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 49088136, + "step": 506 + }, + { + "epoch": 0.049569808369182636, + "grad_norm": 6.663685306196114, + "learning_rate": 5e-05, + "loss": 0.1873, + "num_input_tokens_seen": 49186248, + "step": 507 + }, + { + "epoch": 0.049569808369182636, + "loss": 0.1504230499267578, + "loss_ce": 0.008150102570652962, + "loss_iou": 0.47265625, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 49186248, + "step": 507 + }, + { + "epoch": 0.0496675791943684, + "grad_norm": 7.7681476866438315, + "learning_rate": 5e-05, + "loss": 0.1261, + "num_input_tokens_seen": 49282456, + "step": 508 + }, + { + "epoch": 0.0496675791943684, + "loss": 0.11771764606237411, + "loss_ce": 0.0060233077965676785, + "loss_iou": 0.359375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 49282456, + "step": 508 + }, + { + "epoch": 0.049765350019554164, + "grad_norm": 3.9672312995989687, + "learning_rate": 5e-05, + "loss": 0.1538, + "num_input_tokens_seen": 49379280, + "step": 509 + }, + { + "epoch": 0.049765350019554164, + "loss": 0.12041326612234116, + "loss_ce": 0.0062164864502847195, + "loss_iou": 0.337890625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 49379280, + "step": 509 + }, + { + "epoch": 0.04986312084473993, + "grad_norm": 6.428447192411987, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 49476020, + "step": 510 + }, + { + "epoch": 0.04986312084473993, + "loss": 0.1396549493074417, + "loss_ce": 0.006140535697340965, + "loss_iou": 0.37109375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 49476020, + "step": 510 + }, + { + "epoch": 0.04996089166992569, + "grad_norm": 1.7427293956929697, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 49572956, + "step": 511 + }, + { + "epoch": 0.04996089166992569, + "loss": 0.10317505151033401, + "loss_ce": 0.008025058545172215, + "loss_iou": 0.2578125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 49572956, + "step": 511 + }, + { + "epoch": 0.050058662495111456, + "grad_norm": 3.4736193076079136, + "learning_rate": 5e-05, + "loss": 0.1493, + "num_input_tokens_seen": 49669988, + "step": 512 + }, + { + "epoch": 0.050058662495111456, + "loss": 0.17333564162254333, + "loss_ce": 0.004573447164148092, + "loss_iou": 0.26953125, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 49669988, + "step": 512 + }, + { + "epoch": 0.050156433320297227, + "grad_norm": 4.354441747177931, + "learning_rate": 5e-05, + "loss": 0.1721, + "num_input_tokens_seen": 49767800, + "step": 513 + }, + { + "epoch": 0.050156433320297227, + "loss": 0.19011728465557098, + "loss_ce": 0.006340435240417719, + "loss_iou": 0.46484375, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 49767800, + "step": 513 + }, + { + "epoch": 0.05025420414548299, + "grad_norm": 8.236757632382192, + "learning_rate": 5e-05, + "loss": 0.175, + "num_input_tokens_seen": 49865708, + "step": 514 + }, + { + "epoch": 0.05025420414548299, + "loss": 0.16768503189086914, + "loss_ce": 0.019400114193558693, + "loss_iou": 0.349609375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 49865708, + "step": 514 + }, + { + "epoch": 0.050351974970668754, + "grad_norm": 17.086978294172475, + "learning_rate": 5e-05, + "loss": 0.1497, + "num_input_tokens_seen": 49961552, + "step": 515 + }, + { + "epoch": 0.050351974970668754, + "loss": 0.15791729092597961, + "loss_ce": 0.00526835722848773, + "loss_iou": 0.4375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 49961552, + "step": 515 + }, + { + "epoch": 0.05044974579585452, + "grad_norm": 5.576679531958197, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 50058824, + "step": 516 + }, + { + "epoch": 0.05044974579585452, + "loss": 0.12616506218910217, + "loss_ce": 0.003331803483888507, + "loss_iou": 0.30078125, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 50058824, + "step": 516 + }, + { + "epoch": 0.05054751662104028, + "grad_norm": 4.4919777945341774, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 50155560, + "step": 517 + }, + { + "epoch": 0.05054751662104028, + "loss": 0.10864914953708649, + "loss_ce": 0.006750958040356636, + "loss_iou": 0.408203125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 50155560, + "step": 517 + }, + { + "epoch": 0.050645287446226046, + "grad_norm": 3.1785001635543626, + "learning_rate": 5e-05, + "loss": 0.146, + "num_input_tokens_seen": 50250932, + "step": 518 + }, + { + "epoch": 0.050645287446226046, + "loss": 0.17355644702911377, + "loss_ce": 0.008151180110871792, + "loss_iou": 0.189453125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 50250932, + "step": 518 + }, + { + "epoch": 0.05074305827141181, + "grad_norm": 9.36830647215384, + "learning_rate": 5e-05, + "loss": 0.1578, + "num_input_tokens_seen": 50348436, + "step": 519 + }, + { + "epoch": 0.05074305827141181, + "loss": 0.1744655966758728, + "loss_ce": 0.010342065244913101, + "loss_iou": 0.3828125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 50348436, + "step": 519 + }, + { + "epoch": 0.050840829096597574, + "grad_norm": 5.182856771218037, + "learning_rate": 5e-05, + "loss": 0.2289, + "num_input_tokens_seen": 50445716, + "step": 520 + }, + { + "epoch": 0.050840829096597574, + "loss": 0.25339633226394653, + "loss_ce": 0.004983225837349892, + "loss_iou": 0.416015625, + "loss_num": 0.0498046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 50445716, + "step": 520 + }, + { + "epoch": 0.05093859992178334, + "grad_norm": 19.906517914766006, + "learning_rate": 5e-05, + "loss": 0.175, + "num_input_tokens_seen": 50542084, + "step": 521 + }, + { + "epoch": 0.05093859992178334, + "loss": 0.2124967724084854, + "loss_ce": 0.012545588426291943, + "loss_iou": 0.341796875, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 50542084, + "step": 521 + }, + { + "epoch": 0.0510363707469691, + "grad_norm": 6.519546693272978, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 50639232, + "step": 522 + }, + { + "epoch": 0.0510363707469691, + "loss": 0.08095307648181915, + "loss_ce": 0.0075430357828736305, + "loss_iou": 0.3671875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 50639232, + "step": 522 + }, + { + "epoch": 0.051134141572154866, + "grad_norm": 11.78705374116546, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 50736088, + "step": 523 + }, + { + "epoch": 0.051134141572154866, + "loss": 0.09843708574771881, + "loss_ce": 0.0034969025291502476, + "loss_iou": 0.3515625, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 50736088, + "step": 523 + }, + { + "epoch": 0.05123191239734063, + "grad_norm": 7.149252183644673, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 50833968, + "step": 524 + }, + { + "epoch": 0.05123191239734063, + "loss": 0.15755681693553925, + "loss_ce": 0.006250661797821522, + "loss_iou": 0.384765625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 50833968, + "step": 524 + }, + { + "epoch": 0.0513296832225264, + "grad_norm": 22.462589301895747, + "learning_rate": 5e-05, + "loss": 0.1444, + "num_input_tokens_seen": 50932004, + "step": 525 + }, + { + "epoch": 0.0513296832225264, + "loss": 0.10046899318695068, + "loss_ce": 0.0035451645962893963, + "loss_iou": 0.58203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 50932004, + "step": 525 + }, + { + "epoch": 0.051427454047712165, + "grad_norm": 21.62230796539008, + "learning_rate": 5e-05, + "loss": 0.1457, + "num_input_tokens_seen": 51029548, + "step": 526 + }, + { + "epoch": 0.051427454047712165, + "loss": 0.10715391486883163, + "loss_ce": 0.0057745156809687614, + "loss_iou": 0.318359375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 51029548, + "step": 526 + }, + { + "epoch": 0.05152522487289793, + "grad_norm": 8.248851802663792, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 51125844, + "step": 527 + }, + { + "epoch": 0.05152522487289793, + "loss": 0.1256190687417984, + "loss_ce": 0.008572719991207123, + "loss_iou": 0.326171875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 51125844, + "step": 527 + }, + { + "epoch": 0.05162299569808369, + "grad_norm": 4.08028455832857, + "learning_rate": 5e-05, + "loss": 0.1927, + "num_input_tokens_seen": 51222916, + "step": 528 + }, + { + "epoch": 0.05162299569808369, + "loss": 0.25113189220428467, + "loss_ce": 0.01169097051024437, + "loss_iou": 0.24609375, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 51222916, + "step": 528 + }, + { + "epoch": 0.05172076652326946, + "grad_norm": 9.406312287056878, + "learning_rate": 5e-05, + "loss": 0.1614, + "num_input_tokens_seen": 51320032, + "step": 529 + }, + { + "epoch": 0.05172076652326946, + "loss": 0.17998608946800232, + "loss_ce": 0.010247327387332916, + "loss_iou": 0.43359375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 51320032, + "step": 529 + }, + { + "epoch": 0.05181853734845522, + "grad_norm": 20.760161170521272, + "learning_rate": 5e-05, + "loss": 0.2106, + "num_input_tokens_seen": 51416476, + "step": 530 + }, + { + "epoch": 0.05181853734845522, + "loss": 0.20119550824165344, + "loss_ce": 0.005272654816508293, + "loss_iou": 0.29296875, + "loss_num": 0.0390625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 51416476, + "step": 530 + }, + { + "epoch": 0.051916308173640985, + "grad_norm": 10.891267854284179, + "learning_rate": 5e-05, + "loss": 0.2823, + "num_input_tokens_seen": 51513564, + "step": 531 + }, + { + "epoch": 0.051916308173640985, + "loss": 0.27392467856407166, + "loss_ce": 0.014647331088781357, + "loss_iou": 0.34765625, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 51513564, + "step": 531 + }, + { + "epoch": 0.05201407899882675, + "grad_norm": 37.913405763641514, + "learning_rate": 5e-05, + "loss": 0.533, + "num_input_tokens_seen": 51610992, + "step": 532 + }, + { + "epoch": 0.05201407899882675, + "loss": 0.5406402945518494, + "loss_ce": 0.08019108325242996, + "loss_iou": 0.494140625, + "loss_num": 0.09228515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 51610992, + "step": 532 + }, + { + "epoch": 0.05211184982401251, + "grad_norm": 27.897063849173595, + "learning_rate": 5e-05, + "loss": 0.4854, + "num_input_tokens_seen": 51707964, + "step": 533 + }, + { + "epoch": 0.05211184982401251, + "loss": 0.5097014307975769, + "loss_ce": 0.2374236136674881, + "loss_iou": 0.38671875, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 51707964, + "step": 533 + }, + { + "epoch": 0.05220962064919828, + "grad_norm": 23.407208994842797, + "learning_rate": 5e-05, + "loss": 0.3373, + "num_input_tokens_seen": 51804892, + "step": 534 + }, + { + "epoch": 0.05220962064919828, + "loss": 0.3546872138977051, + "loss_ce": 0.23850679397583008, + "loss_iou": 0.40625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 51804892, + "step": 534 + }, + { + "epoch": 0.05230739147438404, + "grad_norm": 9.975258490734396, + "learning_rate": 5e-05, + "loss": 0.1953, + "num_input_tokens_seen": 51902328, + "step": 535 + }, + { + "epoch": 0.05230739147438404, + "loss": 0.19158372282981873, + "loss_ce": 0.007196506485342979, + "loss_iou": 0.296875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 51902328, + "step": 535 + }, + { + "epoch": 0.05240516229956981, + "grad_norm": 10.0396397581514, + "learning_rate": 5e-05, + "loss": 0.1971, + "num_input_tokens_seen": 51999624, + "step": 536 + }, + { + "epoch": 0.05240516229956981, + "loss": 0.16984686255455017, + "loss_ce": 0.005723331123590469, + "loss_iou": 0.50390625, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 51999624, + "step": 536 + }, + { + "epoch": 0.052502933124755576, + "grad_norm": 10.953037028397947, + "learning_rate": 5e-05, + "loss": 0.2185, + "num_input_tokens_seen": 52096768, + "step": 537 + }, + { + "epoch": 0.052502933124755576, + "loss": 0.21009153127670288, + "loss_ce": 0.012825911864638329, + "loss_iou": 0.34765625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 52096768, + "step": 537 + }, + { + "epoch": 0.05260070394994134, + "grad_norm": 5.72417359121694, + "learning_rate": 5e-05, + "loss": 0.1538, + "num_input_tokens_seen": 52193264, + "step": 538 + }, + { + "epoch": 0.05260070394994134, + "loss": 0.1971423178911209, + "loss_ce": 0.011351300403475761, + "loss_iou": 0.4140625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 52193264, + "step": 538 + }, + { + "epoch": 0.052698474775127103, + "grad_norm": 5.376894733564098, + "learning_rate": 5e-05, + "loss": 0.1417, + "num_input_tokens_seen": 52290620, + "step": 539 + }, + { + "epoch": 0.052698474775127103, + "loss": 0.14767354726791382, + "loss_ce": 0.005522668361663818, + "loss_iou": 0.54296875, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 52290620, + "step": 539 + }, + { + "epoch": 0.05279624560031287, + "grad_norm": 7.821732411835444, + "learning_rate": 5e-05, + "loss": 0.1753, + "num_input_tokens_seen": 52388092, + "step": 540 + }, + { + "epoch": 0.05279624560031287, + "loss": 0.17167305946350098, + "loss_ce": 0.006084670312702656, + "loss_iou": 0.35546875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 52388092, + "step": 540 + }, + { + "epoch": 0.05289401642549863, + "grad_norm": 8.282057811618456, + "learning_rate": 5e-05, + "loss": 0.1674, + "num_input_tokens_seen": 52484740, + "step": 541 + }, + { + "epoch": 0.05289401642549863, + "loss": 0.1299392133951187, + "loss_ce": 0.005213877186179161, + "loss_iou": 0.40234375, + "loss_num": 0.0250244140625, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 52484740, + "step": 541 + }, + { + "epoch": 0.052991787250684395, + "grad_norm": 4.8515718228261875, + "learning_rate": 5e-05, + "loss": 0.1505, + "num_input_tokens_seen": 52581260, + "step": 542 + }, + { + "epoch": 0.052991787250684395, + "loss": 0.10591436922550201, + "loss_ce": 0.0036499681882560253, + "loss_iou": 0.51171875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 52581260, + "step": 542 + }, + { + "epoch": 0.05308955807587016, + "grad_norm": 5.373597151072147, + "learning_rate": 5e-05, + "loss": 0.1664, + "num_input_tokens_seen": 52677372, + "step": 543 + }, + { + "epoch": 0.05308955807587016, + "loss": 0.20608113706111908, + "loss_ce": 0.00622151792049408, + "loss_iou": 0.32421875, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 52677372, + "step": 543 + }, + { + "epoch": 0.05318732890105592, + "grad_norm": 3.7624051190844523, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 52774696, + "step": 544 + }, + { + "epoch": 0.05318732890105592, + "loss": 0.12192384898662567, + "loss_ce": 0.006689468864351511, + "loss_iou": 0.28125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 52774696, + "step": 544 + }, + { + "epoch": 0.05328509972624169, + "grad_norm": 18.808734428712896, + "learning_rate": 5e-05, + "loss": 0.2013, + "num_input_tokens_seen": 52872716, + "step": 545 + }, + { + "epoch": 0.05328509972624169, + "loss": 0.18697893619537354, + "loss_ce": 0.0075355707667768, + "loss_iou": 0.44921875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 52872716, + "step": 545 + }, + { + "epoch": 0.05338287055142745, + "grad_norm": 5.467307811867144, + "learning_rate": 5e-05, + "loss": 0.1514, + "num_input_tokens_seen": 52969456, + "step": 546 + }, + { + "epoch": 0.05338287055142745, + "loss": 0.17758405208587646, + "loss_ce": 0.0062278397381305695, + "loss_iou": 0.3984375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 52969456, + "step": 546 + }, + { + "epoch": 0.053480641376613215, + "grad_norm": 11.674032472989005, + "learning_rate": 5e-05, + "loss": 0.1564, + "num_input_tokens_seen": 53066640, + "step": 547 + }, + { + "epoch": 0.053480641376613215, + "loss": 0.13117222487926483, + "loss_ce": 0.00959019549190998, + "loss_iou": 0.494140625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 53066640, + "step": 547 + }, + { + "epoch": 0.053578412201798986, + "grad_norm": 4.9415132794090475, + "learning_rate": 5e-05, + "loss": 0.21, + "num_input_tokens_seen": 53163336, + "step": 548 + }, + { + "epoch": 0.053578412201798986, + "loss": 0.23969894647598267, + "loss_ce": 0.005537569988518953, + "loss_iou": 0.3984375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 53163336, + "step": 548 + }, + { + "epoch": 0.05367618302698475, + "grad_norm": 6.990775483778433, + "learning_rate": 5e-05, + "loss": 0.1583, + "num_input_tokens_seen": 53259796, + "step": 549 + }, + { + "epoch": 0.05367618302698475, + "loss": 0.14262421429157257, + "loss_ce": 0.009262394160032272, + "loss_iou": 0.345703125, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 53259796, + "step": 549 + }, + { + "epoch": 0.053773953852170514, + "grad_norm": 4.403913353281761, + "learning_rate": 5e-05, + "loss": 0.137, + "num_input_tokens_seen": 53356796, + "step": 550 + }, + { + "epoch": 0.053773953852170514, + "loss": 0.1372445821762085, + "loss_ce": 0.00989473145455122, + "loss_iou": 0.40625, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 53356796, + "step": 550 + }, + { + "epoch": 0.05387172467735628, + "grad_norm": 7.924832746644745, + "learning_rate": 5e-05, + "loss": 0.1763, + "num_input_tokens_seen": 53453208, + "step": 551 + }, + { + "epoch": 0.05387172467735628, + "loss": 0.1640503853559494, + "loss_ce": 0.003939907997846603, + "loss_iou": 0.28125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 53453208, + "step": 551 + }, + { + "epoch": 0.05396949550254204, + "grad_norm": 14.364502017312777, + "learning_rate": 5e-05, + "loss": 0.1585, + "num_input_tokens_seen": 53549464, + "step": 552 + }, + { + "epoch": 0.05396949550254204, + "loss": 0.14190220832824707, + "loss_ce": 0.004649395123124123, + "loss_iou": 0.267578125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 53549464, + "step": 552 + }, + { + "epoch": 0.054067266327727806, + "grad_norm": 8.979434515094272, + "learning_rate": 5e-05, + "loss": 0.2223, + "num_input_tokens_seen": 53646668, + "step": 553 + }, + { + "epoch": 0.054067266327727806, + "loss": 0.2551429867744446, + "loss_ce": 0.021866634488105774, + "loss_iou": 0.376953125, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 53646668, + "step": 553 + }, + { + "epoch": 0.05416503715291357, + "grad_norm": 5.031537992994603, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 53742836, + "step": 554 + }, + { + "epoch": 0.05416503715291357, + "loss": 0.09564336389303207, + "loss_ce": 0.011048637330532074, + "loss_iou": 0.25390625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 53742836, + "step": 554 + }, + { + "epoch": 0.054262807978099334, + "grad_norm": 8.555650528407654, + "learning_rate": 5e-05, + "loss": 0.1519, + "num_input_tokens_seen": 53840072, + "step": 555 + }, + { + "epoch": 0.054262807978099334, + "loss": 0.09958596527576447, + "loss_ce": 0.008765649981796741, + "loss_iou": 0.447265625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 53840072, + "step": 555 + }, + { + "epoch": 0.0543605788032851, + "grad_norm": 3.4765829996769315, + "learning_rate": 5e-05, + "loss": 0.1969, + "num_input_tokens_seen": 53936668, + "step": 556 + }, + { + "epoch": 0.0543605788032851, + "loss": 0.18922121822834015, + "loss_ce": 0.007824739441275597, + "loss_iou": 0.349609375, + "loss_num": 0.0361328125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 53936668, + "step": 556 + }, + { + "epoch": 0.05445834962847086, + "grad_norm": 4.261579462329696, + "learning_rate": 5e-05, + "loss": 0.1417, + "num_input_tokens_seen": 54033056, + "step": 557 + }, + { + "epoch": 0.05445834962847086, + "loss": 0.22052448987960815, + "loss_ce": 0.009113968349993229, + "loss_iou": 0.2578125, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 54033056, + "step": 557 + }, + { + "epoch": 0.054556120453656626, + "grad_norm": 13.314298484809784, + "learning_rate": 5e-05, + "loss": 0.1439, + "num_input_tokens_seen": 54129404, + "step": 558 + }, + { + "epoch": 0.054556120453656626, + "loss": 0.13026204705238342, + "loss_ce": 0.008100176230072975, + "loss_iou": 0.32421875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 54129404, + "step": 558 + }, + { + "epoch": 0.0546538912788424, + "grad_norm": 6.882586657486078, + "learning_rate": 5e-05, + "loss": 0.2189, + "num_input_tokens_seen": 54226320, + "step": 559 + }, + { + "epoch": 0.0546538912788424, + "loss": 0.2524586617946625, + "loss_ce": 0.00795181468129158, + "loss_iou": 0.37890625, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 54226320, + "step": 559 + }, + { + "epoch": 0.05475166210402816, + "grad_norm": 4.4035635157748105, + "learning_rate": 5e-05, + "loss": 0.2178, + "num_input_tokens_seen": 54323412, + "step": 560 + }, + { + "epoch": 0.05475166210402816, + "loss": 0.33260756731033325, + "loss_ce": 0.008114155381917953, + "loss_iou": 0.333984375, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 54323412, + "step": 560 + }, + { + "epoch": 0.054849432929213925, + "grad_norm": 4.154978516811744, + "learning_rate": 5e-05, + "loss": 0.1579, + "num_input_tokens_seen": 54420320, + "step": 561 + }, + { + "epoch": 0.054849432929213925, + "loss": 0.1272122710943222, + "loss_ce": 0.006072741001844406, + "loss_iou": 0.421875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 54420320, + "step": 561 + }, + { + "epoch": 0.05494720375439969, + "grad_norm": 13.202851377995994, + "learning_rate": 5e-05, + "loss": 0.1685, + "num_input_tokens_seen": 54517068, + "step": 562 + }, + { + "epoch": 0.05494720375439969, + "loss": 0.15341231226921082, + "loss_ce": 0.004883269779384136, + "loss_iou": 0.44140625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 54517068, + "step": 562 + }, + { + "epoch": 0.05504497457958545, + "grad_norm": 3.030777534539237, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 54614608, + "step": 563 + }, + { + "epoch": 0.05504497457958545, + "loss": 0.12149754166603088, + "loss_ce": 0.006293692160397768, + "loss_iou": 0.41796875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 54614608, + "step": 563 + }, + { + "epoch": 0.055142745404771216, + "grad_norm": 5.962391326571526, + "learning_rate": 5e-05, + "loss": 0.1684, + "num_input_tokens_seen": 54711732, + "step": 564 + }, + { + "epoch": 0.055142745404771216, + "loss": 0.10435719788074493, + "loss_ce": 0.005907484330236912, + "loss_iou": 0.373046875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 54711732, + "step": 564 + }, + { + "epoch": 0.05524051622995698, + "grad_norm": 7.268576536906544, + "learning_rate": 5e-05, + "loss": 0.1643, + "num_input_tokens_seen": 54808912, + "step": 565 + }, + { + "epoch": 0.05524051622995698, + "loss": 0.20475086569786072, + "loss_ce": 0.006264536175876856, + "loss_iou": 0.357421875, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 54808912, + "step": 565 + }, + { + "epoch": 0.055338287055142744, + "grad_norm": 15.52220128302993, + "learning_rate": 5e-05, + "loss": 0.1416, + "num_input_tokens_seen": 54904960, + "step": 566 + }, + { + "epoch": 0.055338287055142744, + "loss": 0.16893108189105988, + "loss_ce": 0.003823353908956051, + "loss_iou": 0.275390625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 54904960, + "step": 566 + }, + { + "epoch": 0.05543605788032851, + "grad_norm": 14.044440428199927, + "learning_rate": 5e-05, + "loss": 0.1417, + "num_input_tokens_seen": 55003436, + "step": 567 + }, + { + "epoch": 0.05543605788032851, + "loss": 0.14902931451797485, + "loss_ce": 0.0014920870307832956, + "loss_iou": 0.53515625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 55003436, + "step": 567 + }, + { + "epoch": 0.05553382870551427, + "grad_norm": 3.8399051322459683, + "learning_rate": 5e-05, + "loss": 0.1334, + "num_input_tokens_seen": 55099976, + "step": 568 + }, + { + "epoch": 0.05553382870551427, + "loss": 0.1440257579088211, + "loss_ce": 0.003583865240216255, + "loss_iou": 0.3984375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 55099976, + "step": 568 + }, + { + "epoch": 0.055631599530700036, + "grad_norm": 6.6969893723709175, + "learning_rate": 5e-05, + "loss": 0.1548, + "num_input_tokens_seen": 55197184, + "step": 569 + }, + { + "epoch": 0.055631599530700036, + "loss": 0.161346435546875, + "loss_ce": 0.006530769634991884, + "loss_iou": 0.380859375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 55197184, + "step": 569 + }, + { + "epoch": 0.0557293703558858, + "grad_norm": 4.4386369566405985, + "learning_rate": 5e-05, + "loss": 0.1498, + "num_input_tokens_seen": 55292944, + "step": 570 + }, + { + "epoch": 0.0557293703558858, + "loss": 0.1844465732574463, + "loss_ce": 0.004606474190950394, + "loss_iou": 0.22265625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 55292944, + "step": 570 + }, + { + "epoch": 0.05582714118107157, + "grad_norm": 6.214903516011052, + "learning_rate": 5e-05, + "loss": 0.1391, + "num_input_tokens_seen": 55390220, + "step": 571 + }, + { + "epoch": 0.05582714118107157, + "loss": 0.1451554298400879, + "loss_ce": 0.007246495224535465, + "loss_iou": 0.37890625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 55390220, + "step": 571 + }, + { + "epoch": 0.055924912006257335, + "grad_norm": 6.177798495085715, + "learning_rate": 5e-05, + "loss": 0.1532, + "num_input_tokens_seen": 55486916, + "step": 572 + }, + { + "epoch": 0.055924912006257335, + "loss": 0.18468374013900757, + "loss_ce": 0.006453466601669788, + "loss_iou": 0.2314453125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 55486916, + "step": 572 + }, + { + "epoch": 0.0560226828314431, + "grad_norm": 7.02210511740415, + "learning_rate": 5e-05, + "loss": 0.1437, + "num_input_tokens_seen": 55583680, + "step": 573 + }, + { + "epoch": 0.0560226828314431, + "loss": 0.1374773234128952, + "loss_ce": 0.004176532384008169, + "loss_iou": 0.470703125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 55583680, + "step": 573 + }, + { + "epoch": 0.05612045365662886, + "grad_norm": 4.08352680436933, + "learning_rate": 5e-05, + "loss": 0.1606, + "num_input_tokens_seen": 55680944, + "step": 574 + }, + { + "epoch": 0.05612045365662886, + "loss": 0.13307611644268036, + "loss_ce": 0.005909367464482784, + "loss_iou": 0.408203125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 55680944, + "step": 574 + }, + { + "epoch": 0.05621822448181463, + "grad_norm": 4.315603594722635, + "learning_rate": 5e-05, + "loss": 0.1659, + "num_input_tokens_seen": 55778200, + "step": 575 + }, + { + "epoch": 0.05621822448181463, + "loss": 0.16750670969486237, + "loss_ce": 0.007350460160523653, + "loss_iou": 0.392578125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 55778200, + "step": 575 + }, + { + "epoch": 0.05631599530700039, + "grad_norm": 12.79609997710089, + "learning_rate": 5e-05, + "loss": 0.1492, + "num_input_tokens_seen": 55875504, + "step": 576 + }, + { + "epoch": 0.05631599530700039, + "loss": 0.17049123346805573, + "loss_ce": 0.012166035361588001, + "loss_iou": 0.388671875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 55875504, + "step": 576 + }, + { + "epoch": 0.056413766132186155, + "grad_norm": 3.5204577313498566, + "learning_rate": 5e-05, + "loss": 0.1819, + "num_input_tokens_seen": 55972912, + "step": 577 + }, + { + "epoch": 0.056413766132186155, + "loss": 0.20535314083099365, + "loss_ce": 0.003998154308646917, + "loss_iou": 0.427734375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 55972912, + "step": 577 + }, + { + "epoch": 0.05651153695737192, + "grad_norm": 2.406610625187719, + "learning_rate": 5e-05, + "loss": 0.1395, + "num_input_tokens_seen": 56069608, + "step": 578 + }, + { + "epoch": 0.05651153695737192, + "loss": 0.18540802597999573, + "loss_ce": 0.0073074353858828545, + "loss_iou": 0.34765625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 56069608, + "step": 578 + }, + { + "epoch": 0.05660930778255768, + "grad_norm": 3.8688930031942075, + "learning_rate": 5e-05, + "loss": 0.1261, + "num_input_tokens_seen": 56167148, + "step": 579 + }, + { + "epoch": 0.05660930778255768, + "loss": 0.12308663129806519, + "loss_ce": 0.0043732523918151855, + "loss_iou": 0.41015625, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 56167148, + "step": 579 + }, + { + "epoch": 0.05670707860774345, + "grad_norm": 3.135014545835595, + "learning_rate": 5e-05, + "loss": 0.1188, + "num_input_tokens_seen": 56264964, + "step": 580 + }, + { + "epoch": 0.05670707860774345, + "loss": 0.08967848122119904, + "loss_ce": 0.001970970770344138, + "loss_iou": 0.3515625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 56264964, + "step": 580 + }, + { + "epoch": 0.05680484943292921, + "grad_norm": 3.2326219284527857, + "learning_rate": 5e-05, + "loss": 0.1265, + "num_input_tokens_seen": 56360792, + "step": 581 + }, + { + "epoch": 0.05680484943292921, + "loss": 0.1405835747718811, + "loss_ce": 0.0032086949795484543, + "loss_iou": 0.314453125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 56360792, + "step": 581 + }, + { + "epoch": 0.05690262025811498, + "grad_norm": 5.201231941696852, + "learning_rate": 5e-05, + "loss": 0.149, + "num_input_tokens_seen": 56458188, + "step": 582 + }, + { + "epoch": 0.05690262025811498, + "loss": 0.15796779096126556, + "loss_ce": 0.0030605639331042767, + "loss_iou": 0.42578125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 56458188, + "step": 582 + }, + { + "epoch": 0.057000391083300746, + "grad_norm": 3.5141215487107953, + "learning_rate": 5e-05, + "loss": 0.1438, + "num_input_tokens_seen": 56554752, + "step": 583 + }, + { + "epoch": 0.057000391083300746, + "loss": 0.12010706961154938, + "loss_ce": 0.0021108449436724186, + "loss_iou": 0.31640625, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 56554752, + "step": 583 + }, + { + "epoch": 0.05709816190848651, + "grad_norm": 5.146088739912364, + "learning_rate": 5e-05, + "loss": 0.1556, + "num_input_tokens_seen": 56651880, + "step": 584 + }, + { + "epoch": 0.05709816190848651, + "loss": 0.1674424558877945, + "loss_ce": 0.006370682269334793, + "loss_iou": 0.3984375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 56651880, + "step": 584 + }, + { + "epoch": 0.057195932733672274, + "grad_norm": 2.623076640686541, + "learning_rate": 5e-05, + "loss": 0.1352, + "num_input_tokens_seen": 56748332, + "step": 585 + }, + { + "epoch": 0.057195932733672274, + "loss": 0.14396822452545166, + "loss_ce": 0.0031906296499073505, + "loss_iou": 0.453125, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 56748332, + "step": 585 + }, + { + "epoch": 0.05729370355885804, + "grad_norm": 23.682983331331805, + "learning_rate": 5e-05, + "loss": 0.1843, + "num_input_tokens_seen": 56845548, + "step": 586 + }, + { + "epoch": 0.05729370355885804, + "loss": 0.17837390303611755, + "loss_ce": 0.007261841092258692, + "loss_iou": 0.416015625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 56845548, + "step": 586 + }, + { + "epoch": 0.0573914743840438, + "grad_norm": 2.6899083263558343, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 56942076, + "step": 587 + }, + { + "epoch": 0.0573914743840438, + "loss": 0.0850241407752037, + "loss_ce": 0.00238253572024405, + "loss_iou": 0.322265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 56942076, + "step": 587 + }, + { + "epoch": 0.057489245209229566, + "grad_norm": 3.7149391784992294, + "learning_rate": 5e-05, + "loss": 0.1308, + "num_input_tokens_seen": 57039600, + "step": 588 + }, + { + "epoch": 0.057489245209229566, + "loss": 0.14361128211021423, + "loss_ce": 0.0035966315772384405, + "loss_iou": 0.39453125, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 57039600, + "step": 588 + }, + { + "epoch": 0.05758701603441533, + "grad_norm": 4.658609514252163, + "learning_rate": 5e-05, + "loss": 0.1217, + "num_input_tokens_seen": 57136376, + "step": 589 + }, + { + "epoch": 0.05758701603441533, + "loss": 0.1434396505355835, + "loss_ce": 0.0035470714792609215, + "loss_iou": 0.34375, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 57136376, + "step": 589 + }, + { + "epoch": 0.057684786859601093, + "grad_norm": 6.369036860808012, + "learning_rate": 5e-05, + "loss": 0.1516, + "num_input_tokens_seen": 57232860, + "step": 590 + }, + { + "epoch": 0.057684786859601093, + "loss": 0.17247462272644043, + "loss_ce": 0.0091445567086339, + "loss_iou": 0.228515625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 57232860, + "step": 590 + }, + { + "epoch": 0.05778255768478686, + "grad_norm": 6.118189023690717, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 57330116, + "step": 591 + }, + { + "epoch": 0.05778255768478686, + "loss": 0.11256353557109833, + "loss_ce": 0.006820128299295902, + "loss_iou": 0.4921875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 57330116, + "step": 591 + }, + { + "epoch": 0.05788032850997262, + "grad_norm": 6.554126206198126, + "learning_rate": 5e-05, + "loss": 0.2103, + "num_input_tokens_seen": 57427488, + "step": 592 + }, + { + "epoch": 0.05788032850997262, + "loss": 0.19432422518730164, + "loss_ce": 0.005008431617170572, + "loss_iou": 0.30859375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 57427488, + "step": 592 + }, + { + "epoch": 0.057978099335158385, + "grad_norm": 5.334463125890769, + "learning_rate": 5e-05, + "loss": 0.1583, + "num_input_tokens_seen": 57523848, + "step": 593 + }, + { + "epoch": 0.057978099335158385, + "loss": 0.13020402193069458, + "loss_ce": 0.007645425386726856, + "loss_iou": 0.390625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 57523848, + "step": 593 + }, + { + "epoch": 0.058075870160344156, + "grad_norm": 7.928374854730178, + "learning_rate": 5e-05, + "loss": 0.128, + "num_input_tokens_seen": 57621440, + "step": 594 + }, + { + "epoch": 0.058075870160344156, + "loss": 0.12639792263507843, + "loss_ce": 0.007318336516618729, + "loss_iou": 0.31640625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 57621440, + "step": 594 + }, + { + "epoch": 0.05817364098552992, + "grad_norm": 3.687018124947224, + "learning_rate": 5e-05, + "loss": 0.1306, + "num_input_tokens_seen": 57719288, + "step": 595 + }, + { + "epoch": 0.05817364098552992, + "loss": 0.13183686137199402, + "loss_ce": 0.004822701681405306, + "loss_iou": 0.400390625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 57719288, + "step": 595 + }, + { + "epoch": 0.058271411810715684, + "grad_norm": 4.377511097156803, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 57816988, + "step": 596 + }, + { + "epoch": 0.058271411810715684, + "loss": 0.08959800004959106, + "loss_ce": 0.004759130068123341, + "loss_iou": 0.5078125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 57816988, + "step": 596 + }, + { + "epoch": 0.05836918263590145, + "grad_norm": 5.59355490668921, + "learning_rate": 5e-05, + "loss": 0.1509, + "num_input_tokens_seen": 57913536, + "step": 597 + }, + { + "epoch": 0.05836918263590145, + "loss": 0.1426202654838562, + "loss_ce": 0.004711332730948925, + "loss_iou": 0.5234375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 57913536, + "step": 597 + }, + { + "epoch": 0.05846695346108721, + "grad_norm": 8.48053055120246, + "learning_rate": 5e-05, + "loss": 0.1746, + "num_input_tokens_seen": 58010348, + "step": 598 + }, + { + "epoch": 0.05846695346108721, + "loss": 0.1808190643787384, + "loss_ce": 0.0056786807253956795, + "loss_iou": 0.263671875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 58010348, + "step": 598 + }, + { + "epoch": 0.058564724286272976, + "grad_norm": 5.349359804968748, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 58107424, + "step": 599 + }, + { + "epoch": 0.058564724286272976, + "loss": 0.09703558683395386, + "loss_ce": 0.004346071742475033, + "loss_iou": 0.447265625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 58107424, + "step": 599 + }, + { + "epoch": 0.05866249511145874, + "grad_norm": 8.463989065020051, + "learning_rate": 5e-05, + "loss": 0.1247, + "num_input_tokens_seen": 58204708, + "step": 600 + }, + { + "epoch": 0.05866249511145874, + "loss": 0.150901660323143, + "loss_ce": 0.007301194127649069, + "loss_iou": 0.46875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 58204708, + "step": 600 + }, + { + "epoch": 0.058760265936644504, + "grad_norm": 3.9022959746382355, + "learning_rate": 5e-05, + "loss": 0.1789, + "num_input_tokens_seen": 58300236, + "step": 601 + }, + { + "epoch": 0.058760265936644504, + "loss": 0.14476847648620605, + "loss_ce": 0.006081349216401577, + "loss_iou": 0.234375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 58300236, + "step": 601 + }, + { + "epoch": 0.05885803676183027, + "grad_norm": 3.9605240712417835, + "learning_rate": 5e-05, + "loss": 0.1592, + "num_input_tokens_seen": 58398040, + "step": 602 + }, + { + "epoch": 0.05885803676183027, + "loss": 0.15888464450836182, + "loss_ce": 0.006357787176966667, + "loss_iou": 0.39453125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 58398040, + "step": 602 + }, + { + "epoch": 0.05895580758701603, + "grad_norm": 4.728459884896472, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 58495256, + "step": 603 + }, + { + "epoch": 0.05895580758701603, + "loss": 0.15815770626068115, + "loss_ce": 0.004284256137907505, + "loss_iou": 0.328125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 58495256, + "step": 603 + }, + { + "epoch": 0.059053578412201796, + "grad_norm": 3.8773968993469508, + "learning_rate": 5e-05, + "loss": 0.1361, + "num_input_tokens_seen": 58592296, + "step": 604 + }, + { + "epoch": 0.059053578412201796, + "loss": 0.13374914228916168, + "loss_ce": 0.00697912834584713, + "loss_iou": 0.3828125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 58592296, + "step": 604 + }, + { + "epoch": 0.05915134923738757, + "grad_norm": 6.437587430387649, + "learning_rate": 5e-05, + "loss": 0.1637, + "num_input_tokens_seen": 58690196, + "step": 605 + }, + { + "epoch": 0.05915134923738757, + "loss": 0.15385743975639343, + "loss_ce": 0.004199235700070858, + "loss_iou": 0.4296875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 58690196, + "step": 605 + }, + { + "epoch": 0.05924912006257333, + "grad_norm": 9.28597298276324, + "learning_rate": 5e-05, + "loss": 0.1517, + "num_input_tokens_seen": 58786884, + "step": 606 + }, + { + "epoch": 0.05924912006257333, + "loss": 0.1680557131767273, + "loss_ce": 0.00533599779009819, + "loss_iou": 0.41796875, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 58786884, + "step": 606 + }, + { + "epoch": 0.059346890887759095, + "grad_norm": 7.0158439952836655, + "learning_rate": 5e-05, + "loss": 0.151, + "num_input_tokens_seen": 58884304, + "step": 607 + }, + { + "epoch": 0.059346890887759095, + "loss": 0.14161868393421173, + "loss_ce": 0.0040912204422056675, + "loss_iou": 0.337890625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 58884304, + "step": 607 + }, + { + "epoch": 0.05944466171294486, + "grad_norm": 4.068361958780785, + "learning_rate": 5e-05, + "loss": 0.1398, + "num_input_tokens_seen": 58981344, + "step": 608 + }, + { + "epoch": 0.05944466171294486, + "loss": 0.14785102009773254, + "loss_ce": 0.007912660948932171, + "loss_iou": 0.35546875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 58981344, + "step": 608 + }, + { + "epoch": 0.05954243253813062, + "grad_norm": 15.846095269683667, + "learning_rate": 5e-05, + "loss": 0.146, + "num_input_tokens_seen": 59077692, + "step": 609 + }, + { + "epoch": 0.05954243253813062, + "loss": 0.1324031502008438, + "loss_ce": 0.005938305985182524, + "loss_iou": 0.49609375, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 59077692, + "step": 609 + }, + { + "epoch": 0.05964020336331639, + "grad_norm": 8.776803650717754, + "learning_rate": 5e-05, + "loss": 0.1981, + "num_input_tokens_seen": 59174244, + "step": 610 + }, + { + "epoch": 0.05964020336331639, + "loss": 0.19909660518169403, + "loss_ce": 0.010497980751097202, + "loss_iou": 0.36328125, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 59174244, + "step": 610 + }, + { + "epoch": 0.05973797418850215, + "grad_norm": 4.900565432298261, + "learning_rate": 5e-05, + "loss": 0.1787, + "num_input_tokens_seen": 59270624, + "step": 611 + }, + { + "epoch": 0.05973797418850215, + "loss": 0.1533440202474594, + "loss_ce": 0.007088529411703348, + "loss_iou": 0.294921875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 59270624, + "step": 611 + }, + { + "epoch": 0.059835745013687915, + "grad_norm": 5.545066213269146, + "learning_rate": 5e-05, + "loss": 0.1607, + "num_input_tokens_seen": 59367576, + "step": 612 + }, + { + "epoch": 0.059835745013687915, + "loss": 0.17592307925224304, + "loss_ce": 0.008625729009509087, + "loss_iou": 0.337890625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 59367576, + "step": 612 + }, + { + "epoch": 0.05993351583887368, + "grad_norm": 36.93613051768902, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 59464700, + "step": 613 + }, + { + "epoch": 0.05993351583887368, + "loss": 0.10106305778026581, + "loss_ce": 0.00801495835185051, + "loss_iou": 0.2734375, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 59464700, + "step": 613 + }, + { + "epoch": 0.06003128666405944, + "grad_norm": 6.125549418476598, + "learning_rate": 5e-05, + "loss": 0.1558, + "num_input_tokens_seen": 59560896, + "step": 614 + }, + { + "epoch": 0.06003128666405944, + "loss": 0.1575726568698883, + "loss_ce": 0.005808750167489052, + "loss_iou": 0.21875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 59560896, + "step": 614 + }, + { + "epoch": 0.060129057489245206, + "grad_norm": 3.749347104533223, + "learning_rate": 5e-05, + "loss": 0.177, + "num_input_tokens_seen": 59657936, + "step": 615 + }, + { + "epoch": 0.060129057489245206, + "loss": 0.22750957310199738, + "loss_ce": 0.008820627816021442, + "loss_iou": 0.52734375, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 59657936, + "step": 615 + }, + { + "epoch": 0.06022682831443097, + "grad_norm": 14.132740379127704, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 59756036, + "step": 616 + }, + { + "epoch": 0.06022682831443097, + "loss": 0.11693374067544937, + "loss_ce": 0.005910795647650957, + "loss_iou": 0.392578125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 59756036, + "step": 616 + }, + { + "epoch": 0.06032459913961674, + "grad_norm": 7.2819264127543955, + "learning_rate": 5e-05, + "loss": 0.1867, + "num_input_tokens_seen": 59852468, + "step": 617 + }, + { + "epoch": 0.06032459913961674, + "loss": 0.16237500309944153, + "loss_ce": 0.006567500531673431, + "loss_iou": 0.26953125, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 59852468, + "step": 617 + }, + { + "epoch": 0.060422369964802505, + "grad_norm": 3.706716467701896, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 59949260, + "step": 618 + }, + { + "epoch": 0.060422369964802505, + "loss": 0.08806642889976501, + "loss_ce": 0.008461332879960537, + "loss_iou": 0.375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 59949260, + "step": 618 + }, + { + "epoch": 0.06052014078998827, + "grad_norm": 4.827038747416053, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 60045996, + "step": 619 + }, + { + "epoch": 0.06052014078998827, + "loss": 0.09072244167327881, + "loss_ce": 0.003838892560452223, + "loss_iou": 0.330078125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 60045996, + "step": 619 + }, + { + "epoch": 0.06061791161517403, + "grad_norm": 6.367757688970512, + "learning_rate": 5e-05, + "loss": 0.1573, + "num_input_tokens_seen": 60141868, + "step": 620 + }, + { + "epoch": 0.06061791161517403, + "loss": 0.11929205060005188, + "loss_ce": 0.0062091597355902195, + "loss_iou": 0.29296875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 60141868, + "step": 620 + }, + { + "epoch": 0.0607156824403598, + "grad_norm": 4.897038286481201, + "learning_rate": 5e-05, + "loss": 0.1611, + "num_input_tokens_seen": 60238208, + "step": 621 + }, + { + "epoch": 0.0607156824403598, + "loss": 0.19423511624336243, + "loss_ce": 0.008306764997541904, + "loss_iou": 0.275390625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 60238208, + "step": 621 + }, + { + "epoch": 0.06081345326554556, + "grad_norm": 7.4067824109952065, + "learning_rate": 5e-05, + "loss": 0.1514, + "num_input_tokens_seen": 60334724, + "step": 622 + }, + { + "epoch": 0.06081345326554556, + "loss": 0.17299829423427582, + "loss_ce": 0.004846449010074139, + "loss_iou": 0.396484375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 60334724, + "step": 622 + }, + { + "epoch": 0.060911224090731325, + "grad_norm": 7.668890261612035, + "learning_rate": 5e-05, + "loss": 0.1687, + "num_input_tokens_seen": 60432276, + "step": 623 + }, + { + "epoch": 0.060911224090731325, + "loss": 0.13603761792182922, + "loss_ce": 0.007253436371684074, + "loss_iou": 0.455078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 60432276, + "step": 623 + }, + { + "epoch": 0.06100899491591709, + "grad_norm": 5.574663849727994, + "learning_rate": 5e-05, + "loss": 0.1394, + "num_input_tokens_seen": 60529912, + "step": 624 + }, + { + "epoch": 0.06100899491591709, + "loss": 0.10524724423885345, + "loss_ce": 0.0036237146705389023, + "loss_iou": 0.462890625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 60529912, + "step": 624 + }, + { + "epoch": 0.06110676574110285, + "grad_norm": 9.703071325769988, + "learning_rate": 5e-05, + "loss": 0.1742, + "num_input_tokens_seen": 60627500, + "step": 625 + }, + { + "epoch": 0.06110676574110285, + "loss": 0.13344891369342804, + "loss_ce": 0.005000432953238487, + "loss_iou": 0.484375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 60627500, + "step": 625 + }, + { + "epoch": 0.06120453656628862, + "grad_norm": 4.476580801609454, + "learning_rate": 5e-05, + "loss": 0.1794, + "num_input_tokens_seen": 60724080, + "step": 626 + }, + { + "epoch": 0.06120453656628862, + "loss": 0.17805010080337524, + "loss_ce": 0.006114063784480095, + "loss_iou": 0.265625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 60724080, + "step": 626 + }, + { + "epoch": 0.06130230739147438, + "grad_norm": 3.0053303082453082, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 60821760, + "step": 627 + }, + { + "epoch": 0.06130230739147438, + "loss": 0.10926289856433868, + "loss_ce": 0.0016731808427721262, + "loss_iou": 0.35546875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 60821760, + "step": 627 + }, + { + "epoch": 0.06140007821666015, + "grad_norm": 7.131606184868625, + "learning_rate": 5e-05, + "loss": 0.1565, + "num_input_tokens_seen": 60918288, + "step": 628 + }, + { + "epoch": 0.06140007821666015, + "loss": 0.17947280406951904, + "loss_ce": 0.007964011281728745, + "loss_iou": 0.45703125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 60918288, + "step": 628 + }, + { + "epoch": 0.061497849041845916, + "grad_norm": 3.3023842134270267, + "learning_rate": 5e-05, + "loss": 0.1346, + "num_input_tokens_seen": 61014132, + "step": 629 + }, + { + "epoch": 0.061497849041845916, + "loss": 0.08126531541347504, + "loss_ce": 0.004788260441273451, + "loss_iou": 0.31640625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 61014132, + "step": 629 + }, + { + "epoch": 0.06159561986703168, + "grad_norm": 8.289008992741586, + "learning_rate": 5e-05, + "loss": 0.1263, + "num_input_tokens_seen": 61110576, + "step": 630 + }, + { + "epoch": 0.06159561986703168, + "loss": 0.11630038172006607, + "loss_ce": 0.004422937054187059, + "loss_iou": 0.431640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 61110576, + "step": 630 + }, + { + "epoch": 0.061693390692217444, + "grad_norm": 5.704372194973768, + "learning_rate": 5e-05, + "loss": 0.149, + "num_input_tokens_seen": 61207640, + "step": 631 + }, + { + "epoch": 0.061693390692217444, + "loss": 0.1347738653421402, + "loss_ce": 0.005531913135200739, + "loss_iou": 0.4453125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 61207640, + "step": 631 + }, + { + "epoch": 0.06179116151740321, + "grad_norm": 4.418843324480752, + "learning_rate": 5e-05, + "loss": 0.1495, + "num_input_tokens_seen": 61304916, + "step": 632 + }, + { + "epoch": 0.06179116151740321, + "loss": 0.13554559648036957, + "loss_ce": 0.009355412796139717, + "loss_iou": 0.484375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 61304916, + "step": 632 + }, + { + "epoch": 0.06188893234258897, + "grad_norm": 6.5183037063138745, + "learning_rate": 5e-05, + "loss": 0.1989, + "num_input_tokens_seen": 61401128, + "step": 633 + }, + { + "epoch": 0.06188893234258897, + "loss": 0.16972962021827698, + "loss_ce": 0.0058502270840108395, + "loss_iou": 0.333984375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 61401128, + "step": 633 + }, + { + "epoch": 0.061986703167774736, + "grad_norm": 5.788572617052485, + "learning_rate": 5e-05, + "loss": 0.1565, + "num_input_tokens_seen": 61497424, + "step": 634 + }, + { + "epoch": 0.061986703167774736, + "loss": 0.1593788117170334, + "loss_ce": 0.006211080122739077, + "loss_iou": 0.388671875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 61497424, + "step": 634 + }, + { + "epoch": 0.0620844739929605, + "grad_norm": 5.907149633884196, + "learning_rate": 5e-05, + "loss": 0.1903, + "num_input_tokens_seen": 61594128, + "step": 635 + }, + { + "epoch": 0.0620844739929605, + "loss": 0.14409002661705017, + "loss_ce": 0.004197455011308193, + "loss_iou": 0.41015625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 61594128, + "step": 635 + }, + { + "epoch": 0.062182244818146264, + "grad_norm": 24.040802888243864, + "learning_rate": 5e-05, + "loss": 0.1703, + "num_input_tokens_seen": 61691080, + "step": 636 + }, + { + "epoch": 0.062182244818146264, + "loss": 0.204533189535141, + "loss_ce": 0.006809801794588566, + "loss_iou": 0.3515625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 61691080, + "step": 636 + }, + { + "epoch": 0.06228001564333203, + "grad_norm": 7.487031939065853, + "learning_rate": 5e-05, + "loss": 0.1934, + "num_input_tokens_seen": 61787552, + "step": 637 + }, + { + "epoch": 0.06228001564333203, + "loss": 0.17979121208190918, + "loss_ce": 0.006939643528312445, + "loss_iou": 0.50390625, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 61787552, + "step": 637 + }, + { + "epoch": 0.06237778646851779, + "grad_norm": 7.390572487011977, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 61885288, + "step": 638 + }, + { + "epoch": 0.06237778646851779, + "loss": 0.15457558631896973, + "loss_ce": 0.014377824030816555, + "loss_iou": 0.52734375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 61885288, + "step": 638 + }, + { + "epoch": 0.062475557293703556, + "grad_norm": 5.100914816243149, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 61982384, + "step": 639 + }, + { + "epoch": 0.062475557293703556, + "loss": 0.09373991191387177, + "loss_ce": 0.006368082948029041, + "loss_iou": 0.388671875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 61982384, + "step": 639 + }, + { + "epoch": 0.06257332811888933, + "grad_norm": 8.192636792091857, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 62080244, + "step": 640 + }, + { + "epoch": 0.06257332811888933, + "loss": 0.14677082002162933, + "loss_ce": 0.007091868203133345, + "loss_iou": 0.384765625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 62080244, + "step": 640 + }, + { + "epoch": 0.06267109894407509, + "grad_norm": 12.442594762848438, + "learning_rate": 5e-05, + "loss": 0.1545, + "num_input_tokens_seen": 62176960, + "step": 641 + }, + { + "epoch": 0.06267109894407509, + "loss": 0.1964971423149109, + "loss_ce": 0.006555742584168911, + "loss_iou": 0.44921875, + "loss_num": 0.0380859375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 62176960, + "step": 641 + }, + { + "epoch": 0.06276886976926085, + "grad_norm": 6.943224462615798, + "learning_rate": 5e-05, + "loss": 0.159, + "num_input_tokens_seen": 62273592, + "step": 642 + }, + { + "epoch": 0.06276886976926085, + "loss": 0.1619463711977005, + "loss_ce": 0.008168306201696396, + "loss_iou": 0.275390625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 62273592, + "step": 642 + }, + { + "epoch": 0.06286664059444662, + "grad_norm": 5.990979059689121, + "learning_rate": 5e-05, + "loss": 0.1649, + "num_input_tokens_seen": 62370644, + "step": 643 + }, + { + "epoch": 0.06286664059444662, + "loss": 0.1667390912771225, + "loss_ce": 0.00932941772043705, + "loss_iou": 0.388671875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 62370644, + "step": 643 + }, + { + "epoch": 0.06296441141963238, + "grad_norm": 4.917518434779369, + "learning_rate": 5e-05, + "loss": 0.1329, + "num_input_tokens_seen": 62467888, + "step": 644 + }, + { + "epoch": 0.06296441141963238, + "loss": 0.10227763652801514, + "loss_ce": 0.005628470331430435, + "loss_iou": 0.384765625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 62467888, + "step": 644 + }, + { + "epoch": 0.06306218224481815, + "grad_norm": 4.370678986163892, + "learning_rate": 5e-05, + "loss": 0.1396, + "num_input_tokens_seen": 62564844, + "step": 645 + }, + { + "epoch": 0.06306218224481815, + "loss": 0.14844931662082672, + "loss_ce": 0.004650484770536423, + "loss_iou": 0.41796875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 62564844, + "step": 645 + }, + { + "epoch": 0.06315995307000391, + "grad_norm": 7.520777515930592, + "learning_rate": 5e-05, + "loss": 0.144, + "num_input_tokens_seen": 62661628, + "step": 646 + }, + { + "epoch": 0.06315995307000391, + "loss": 0.1592249572277069, + "loss_ce": 0.016372166574001312, + "loss_iou": 0.384765625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 62661628, + "step": 646 + }, + { + "epoch": 0.06325772389518967, + "grad_norm": 6.792314087531902, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 62758096, + "step": 647 + }, + { + "epoch": 0.06325772389518967, + "loss": 0.09598828852176666, + "loss_ce": 0.01328565739095211, + "loss_iou": 0.37109375, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 62758096, + "step": 647 + }, + { + "epoch": 0.06335549472037544, + "grad_norm": 6.5747939799460955, + "learning_rate": 5e-05, + "loss": 0.1373, + "num_input_tokens_seen": 62855168, + "step": 648 + }, + { + "epoch": 0.06335549472037544, + "loss": 0.13522833585739136, + "loss_ce": 0.0026599825359880924, + "loss_iou": 0.423828125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 62855168, + "step": 648 + }, + { + "epoch": 0.0634532655455612, + "grad_norm": 15.185757373826302, + "learning_rate": 5e-05, + "loss": 0.2084, + "num_input_tokens_seen": 62951800, + "step": 649 + }, + { + "epoch": 0.0634532655455612, + "loss": 0.27429622411727905, + "loss_ce": 0.008793305605649948, + "loss_iou": 0.35546875, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 62951800, + "step": 649 + }, + { + "epoch": 0.06355103637074697, + "grad_norm": 8.767402602199967, + "learning_rate": 5e-05, + "loss": 0.1261, + "num_input_tokens_seen": 63048684, + "step": 650 + }, + { + "epoch": 0.06355103637074697, + "loss": 0.12555190920829773, + "loss_ce": 0.009066306985914707, + "loss_iou": 0.41796875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 63048684, + "step": 650 + }, + { + "epoch": 0.06364880719593273, + "grad_norm": 8.858185546146176, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 63145944, + "step": 651 + }, + { + "epoch": 0.06364880719593273, + "loss": 0.14957046508789062, + "loss_ce": 0.0030860831029713154, + "loss_iou": 0.578125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 63145944, + "step": 651 + }, + { + "epoch": 0.0637465780211185, + "grad_norm": 12.382669120403458, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 63242680, + "step": 652 + }, + { + "epoch": 0.0637465780211185, + "loss": 0.10226811468601227, + "loss_ce": 0.00424565002322197, + "loss_iou": 0.419921875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 63242680, + "step": 652 + }, + { + "epoch": 0.06384434884630426, + "grad_norm": 5.120320657938447, + "learning_rate": 5e-05, + "loss": 0.1753, + "num_input_tokens_seen": 63338724, + "step": 653 + }, + { + "epoch": 0.06384434884630426, + "loss": 0.12462548911571503, + "loss_ce": 0.006140993908047676, + "loss_iou": 0.474609375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 63338724, + "step": 653 + }, + { + "epoch": 0.06394211967149002, + "grad_norm": 4.164696314464029, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 63436008, + "step": 654 + }, + { + "epoch": 0.06394211967149002, + "loss": 0.09477762132883072, + "loss_ce": 0.008641758002340794, + "loss_iou": 0.330078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 63436008, + "step": 654 + }, + { + "epoch": 0.06403989049667579, + "grad_norm": 10.965696384814532, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 63532872, + "step": 655 + }, + { + "epoch": 0.06403989049667579, + "loss": 0.1406618356704712, + "loss_ce": 0.008978484198451042, + "loss_iou": 0.306640625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 63532872, + "step": 655 + }, + { + "epoch": 0.06413766132186155, + "grad_norm": 36.17044601918213, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 63629080, + "step": 656 + }, + { + "epoch": 0.06413766132186155, + "loss": 0.1626206636428833, + "loss_ce": 0.009895455092191696, + "loss_iou": 0.3125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 63629080, + "step": 656 + }, + { + "epoch": 0.06423543214704733, + "grad_norm": 6.141028274639202, + "learning_rate": 5e-05, + "loss": 0.1645, + "num_input_tokens_seen": 63726356, + "step": 657 + }, + { + "epoch": 0.06423543214704733, + "loss": 0.1505054235458374, + "loss_ce": 0.01058232318609953, + "loss_iou": 0.3828125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 63726356, + "step": 657 + }, + { + "epoch": 0.06433320297223309, + "grad_norm": 8.031925212289597, + "learning_rate": 5e-05, + "loss": 0.185, + "num_input_tokens_seen": 63823540, + "step": 658 + }, + { + "epoch": 0.06433320297223309, + "loss": 0.15723514556884766, + "loss_ce": 0.0037622498348355293, + "loss_iou": 0.4453125, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 63823540, + "step": 658 + }, + { + "epoch": 0.06443097379741886, + "grad_norm": 7.355383857305281, + "learning_rate": 5e-05, + "loss": 0.1938, + "num_input_tokens_seen": 63920320, + "step": 659 + }, + { + "epoch": 0.06443097379741886, + "loss": 0.17393091320991516, + "loss_ce": 0.006999771110713482, + "loss_iou": 0.3515625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 63920320, + "step": 659 + }, + { + "epoch": 0.06452874462260462, + "grad_norm": 3.7833965964585587, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 64017352, + "step": 660 + }, + { + "epoch": 0.06452874462260462, + "loss": 0.12919294834136963, + "loss_ce": 0.009289379231631756, + "loss_iou": 0.28515625, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 64017352, + "step": 660 + }, + { + "epoch": 0.06462651544779038, + "grad_norm": 4.895853768264345, + "learning_rate": 5e-05, + "loss": 0.1551, + "num_input_tokens_seen": 64114120, + "step": 661 + }, + { + "epoch": 0.06462651544779038, + "loss": 0.13116991519927979, + "loss_ce": 0.007512696087360382, + "loss_iou": 0.33984375, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 64114120, + "step": 661 + }, + { + "epoch": 0.06472428627297615, + "grad_norm": 6.112049595713775, + "learning_rate": 5e-05, + "loss": 0.1438, + "num_input_tokens_seen": 64210756, + "step": 662 + }, + { + "epoch": 0.06472428627297615, + "loss": 0.13368190824985504, + "loss_ce": 0.004165307153016329, + "loss_iou": 0.404296875, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 64210756, + "step": 662 + }, + { + "epoch": 0.06482205709816191, + "grad_norm": 4.8781800234905806, + "learning_rate": 5e-05, + "loss": 0.1534, + "num_input_tokens_seen": 64308160, + "step": 663 + }, + { + "epoch": 0.06482205709816191, + "loss": 0.1773533970117569, + "loss_ce": 0.0028691496700048447, + "loss_iou": 0.40625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 64308160, + "step": 663 + }, + { + "epoch": 0.06491982792334768, + "grad_norm": 5.353939004819072, + "learning_rate": 5e-05, + "loss": 0.1326, + "num_input_tokens_seen": 64405356, + "step": 664 + }, + { + "epoch": 0.06491982792334768, + "loss": 0.1741141825914383, + "loss_ce": 0.006877851206809282, + "loss_iou": 0.392578125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 64405356, + "step": 664 + }, + { + "epoch": 0.06501759874853344, + "grad_norm": 6.904993571407262, + "learning_rate": 5e-05, + "loss": 0.1368, + "num_input_tokens_seen": 64501944, + "step": 665 + }, + { + "epoch": 0.06501759874853344, + "loss": 0.12895020842552185, + "loss_ce": 0.00510987127199769, + "loss_iou": 0.404296875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 64501944, + "step": 665 + }, + { + "epoch": 0.0651153695737192, + "grad_norm": 10.401822534972354, + "learning_rate": 5e-05, + "loss": 0.1694, + "num_input_tokens_seen": 64599136, + "step": 666 + }, + { + "epoch": 0.0651153695737192, + "loss": 0.176805317401886, + "loss_ce": 0.005845830775797367, + "loss_iou": 0.470703125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 64599136, + "step": 666 + }, + { + "epoch": 0.06521314039890497, + "grad_norm": 6.3970195249782025, + "learning_rate": 5e-05, + "loss": 0.2269, + "num_input_tokens_seen": 64696036, + "step": 667 + }, + { + "epoch": 0.06521314039890497, + "loss": 0.13692891597747803, + "loss_ce": 0.005215053912252188, + "loss_iou": 0.41796875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 64696036, + "step": 667 + }, + { + "epoch": 0.06531091122409073, + "grad_norm": 5.972067820832822, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 64793304, + "step": 668 + }, + { + "epoch": 0.06531091122409073, + "loss": 0.116127148270607, + "loss_ce": 0.004127629566937685, + "loss_iou": 0.50390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 64793304, + "step": 668 + }, + { + "epoch": 0.0654086820492765, + "grad_norm": 2.96026011723801, + "learning_rate": 5e-05, + "loss": 0.1479, + "num_input_tokens_seen": 64890120, + "step": 669 + }, + { + "epoch": 0.0654086820492765, + "loss": 0.16824519634246826, + "loss_ce": 0.007966878823935986, + "loss_iou": 0.349609375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 64890120, + "step": 669 + }, + { + "epoch": 0.06550645287446226, + "grad_norm": 6.701145006605729, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 64986944, + "step": 670 + }, + { + "epoch": 0.06550645287446226, + "loss": 0.1379653364419937, + "loss_ce": 0.004908696748316288, + "loss_iou": 0.58203125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 64986944, + "step": 670 + }, + { + "epoch": 0.06560422369964802, + "grad_norm": 10.708593333970933, + "learning_rate": 5e-05, + "loss": 0.1319, + "num_input_tokens_seen": 65084196, + "step": 671 + }, + { + "epoch": 0.06560422369964802, + "loss": 0.13014338910579681, + "loss_ce": 0.010758621618151665, + "loss_iou": 0.404296875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 65084196, + "step": 671 + }, + { + "epoch": 0.06570199452483379, + "grad_norm": 10.058488873236653, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 65180780, + "step": 672 + }, + { + "epoch": 0.06570199452483379, + "loss": 0.12395782768726349, + "loss_ce": 0.008601376786828041, + "loss_iou": 0.40625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 65180780, + "step": 672 + }, + { + "epoch": 0.06579976535001955, + "grad_norm": 12.353383140933776, + "learning_rate": 5e-05, + "loss": 0.1851, + "num_input_tokens_seen": 65278456, + "step": 673 + }, + { + "epoch": 0.06579976535001955, + "loss": 0.17981043457984924, + "loss_ce": 0.004670043010264635, + "loss_iou": 0.2890625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 65278456, + "step": 673 + }, + { + "epoch": 0.06589753617520532, + "grad_norm": 7.965183943079611, + "learning_rate": 5e-05, + "loss": 0.1414, + "num_input_tokens_seen": 65375840, + "step": 674 + }, + { + "epoch": 0.06589753617520532, + "loss": 0.1426188349723816, + "loss_ce": 0.005518611054867506, + "loss_iou": 0.423828125, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 65375840, + "step": 674 + }, + { + "epoch": 0.06599530700039108, + "grad_norm": 21.02147729446519, + "learning_rate": 5e-05, + "loss": 0.152, + "num_input_tokens_seen": 65473132, + "step": 675 + }, + { + "epoch": 0.06599530700039108, + "loss": 0.1411101222038269, + "loss_ce": 0.016110118478536606, + "loss_iou": 0.470703125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 65473132, + "step": 675 + }, + { + "epoch": 0.06609307782557684, + "grad_norm": 6.8821079224476875, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 65571252, + "step": 676 + }, + { + "epoch": 0.06609307782557684, + "loss": 0.11797930300235748, + "loss_ce": 0.006422299891710281, + "loss_iou": 0.44140625, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 65571252, + "step": 676 + }, + { + "epoch": 0.06619084865076261, + "grad_norm": 3.682797973488144, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 65668264, + "step": 677 + }, + { + "epoch": 0.06619084865076261, + "loss": 0.08654282987117767, + "loss_ce": 0.0034816069528460503, + "loss_iou": 0.33203125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 65668264, + "step": 677 + }, + { + "epoch": 0.06628861947594837, + "grad_norm": 8.803922797179602, + "learning_rate": 5e-05, + "loss": 0.1302, + "num_input_tokens_seen": 65765004, + "step": 678 + }, + { + "epoch": 0.06628861947594837, + "loss": 0.1439475119113922, + "loss_ce": 0.005077265202999115, + "loss_iou": 0.482421875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 65765004, + "step": 678 + }, + { + "epoch": 0.06638639030113413, + "grad_norm": 5.03589664617508, + "learning_rate": 5e-05, + "loss": 0.1446, + "num_input_tokens_seen": 65861492, + "step": 679 + }, + { + "epoch": 0.06638639030113413, + "loss": 0.1831599473953247, + "loss_ce": 0.01882278360426426, + "loss_iou": 0.330078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 65861492, + "step": 679 + }, + { + "epoch": 0.06648416112631991, + "grad_norm": 4.969596656971702, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 65957868, + "step": 680 + }, + { + "epoch": 0.06648416112631991, + "loss": 0.14942242205142975, + "loss_ce": 0.008187070488929749, + "loss_iou": 0.203125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 65957868, + "step": 680 + }, + { + "epoch": 0.06658193195150568, + "grad_norm": 4.030016977151595, + "learning_rate": 5e-05, + "loss": 0.136, + "num_input_tokens_seen": 66054596, + "step": 681 + }, + { + "epoch": 0.06658193195150568, + "loss": 0.09421020746231079, + "loss_ce": 0.003767552552744746, + "loss_iou": 0.26171875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 66054596, + "step": 681 + }, + { + "epoch": 0.06667970277669144, + "grad_norm": 2.867883126934355, + "learning_rate": 5e-05, + "loss": 0.1363, + "num_input_tokens_seen": 66150264, + "step": 682 + }, + { + "epoch": 0.06667970277669144, + "loss": 0.15606018900871277, + "loss_ce": 0.004952393937855959, + "loss_iou": 0.27734375, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 66150264, + "step": 682 + }, + { + "epoch": 0.0667774736018772, + "grad_norm": 8.394829866129266, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 66247000, + "step": 683 + }, + { + "epoch": 0.0667774736018772, + "loss": 0.12696686387062073, + "loss_ce": 0.011763003654778004, + "loss_iou": 0.28125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 66247000, + "step": 683 + }, + { + "epoch": 0.06687524442706297, + "grad_norm": 6.419757255608237, + "learning_rate": 5e-05, + "loss": 0.1369, + "num_input_tokens_seen": 66343988, + "step": 684 + }, + { + "epoch": 0.06687524442706297, + "loss": 0.12301043421030045, + "loss_ce": 0.00934771727770567, + "loss_iou": 0.3828125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 66343988, + "step": 684 + }, + { + "epoch": 0.06697301525224873, + "grad_norm": 6.686966271501538, + "learning_rate": 5e-05, + "loss": 0.1742, + "num_input_tokens_seen": 66440900, + "step": 685 + }, + { + "epoch": 0.06697301525224873, + "loss": 0.16509884595870972, + "loss_ce": 0.005003638099879026, + "loss_iou": 0.365234375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 66440900, + "step": 685 + }, + { + "epoch": 0.0670707860774345, + "grad_norm": 5.362313791117553, + "learning_rate": 5e-05, + "loss": 0.1658, + "num_input_tokens_seen": 66538428, + "step": 686 + }, + { + "epoch": 0.0670707860774345, + "loss": 0.12198201566934586, + "loss_ce": 0.00516072241589427, + "loss_iou": 0.365234375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 66538428, + "step": 686 + }, + { + "epoch": 0.06716855690262026, + "grad_norm": 5.088338534599709, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 66635712, + "step": 687 + }, + { + "epoch": 0.06716855690262026, + "loss": 0.08431458473205566, + "loss_ce": 0.004205941688269377, + "loss_iou": 0.314453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 66635712, + "step": 687 + }, + { + "epoch": 0.06726632772780602, + "grad_norm": 8.656675336870068, + "learning_rate": 5e-05, + "loss": 0.1345, + "num_input_tokens_seen": 66732364, + "step": 688 + }, + { + "epoch": 0.06726632772780602, + "loss": 0.10629569739103317, + "loss_ce": 0.00876915268599987, + "loss_iou": 0.4296875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 66732364, + "step": 688 + }, + { + "epoch": 0.06736409855299179, + "grad_norm": 7.483119297801607, + "learning_rate": 5e-05, + "loss": 0.1441, + "num_input_tokens_seen": 66829176, + "step": 689 + }, + { + "epoch": 0.06736409855299179, + "loss": 0.1250976026058197, + "loss_ce": 0.0028441823087632656, + "loss_iou": 0.42578125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 66829176, + "step": 689 + }, + { + "epoch": 0.06746186937817755, + "grad_norm": 8.399557303749718, + "learning_rate": 5e-05, + "loss": 0.1433, + "num_input_tokens_seen": 66925184, + "step": 690 + }, + { + "epoch": 0.06746186937817755, + "loss": 0.147377148270607, + "loss_ce": 0.010627885349094868, + "loss_iou": 0.267578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 66925184, + "step": 690 + }, + { + "epoch": 0.06755964020336332, + "grad_norm": 3.7831445208801115, + "learning_rate": 5e-05, + "loss": 0.1661, + "num_input_tokens_seen": 67022340, + "step": 691 + }, + { + "epoch": 0.06755964020336332, + "loss": 0.20133233070373535, + "loss_ce": 0.00376153364777565, + "loss_iou": 0.46484375, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 67022340, + "step": 691 + }, + { + "epoch": 0.06765741102854908, + "grad_norm": 5.175888185445865, + "learning_rate": 5e-05, + "loss": 0.1513, + "num_input_tokens_seen": 67118072, + "step": 692 + }, + { + "epoch": 0.06765741102854908, + "loss": 0.18364007771015167, + "loss_ce": 0.010178161785006523, + "loss_iou": 0.33984375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 67118072, + "step": 692 + }, + { + "epoch": 0.06775518185373484, + "grad_norm": 14.18970077461128, + "learning_rate": 5e-05, + "loss": 0.1619, + "num_input_tokens_seen": 67216024, + "step": 693 + }, + { + "epoch": 0.06775518185373484, + "loss": 0.2031756043434143, + "loss_ce": 0.0052233245223760605, + "loss_iou": 0.333984375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 67216024, + "step": 693 + }, + { + "epoch": 0.06785295267892061, + "grad_norm": 3.158427226912595, + "learning_rate": 5e-05, + "loss": 0.1318, + "num_input_tokens_seen": 67313968, + "step": 694 + }, + { + "epoch": 0.06785295267892061, + "loss": 0.13199275732040405, + "loss_ce": 0.004002021625638008, + "loss_iou": 0.466796875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 67313968, + "step": 694 + }, + { + "epoch": 0.06795072350410637, + "grad_norm": 7.684991123830347, + "learning_rate": 5e-05, + "loss": 0.1781, + "num_input_tokens_seen": 67410908, + "step": 695 + }, + { + "epoch": 0.06795072350410637, + "loss": 0.2037700116634369, + "loss_ce": 0.010654782876372337, + "loss_iou": 0.40234375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 67410908, + "step": 695 + }, + { + "epoch": 0.06804849432929214, + "grad_norm": 4.222828938817524, + "learning_rate": 5e-05, + "loss": 0.1361, + "num_input_tokens_seen": 67507068, + "step": 696 + }, + { + "epoch": 0.06804849432929214, + "loss": 0.185989111661911, + "loss_ce": 0.009673791006207466, + "loss_iou": 0.2431640625, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 67507068, + "step": 696 + }, + { + "epoch": 0.0681462651544779, + "grad_norm": 13.910204696431038, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 67603100, + "step": 697 + }, + { + "epoch": 0.0681462651544779, + "loss": 0.09433898329734802, + "loss_ce": 0.00827941857278347, + "loss_iou": 0.314453125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 67603100, + "step": 697 + }, + { + "epoch": 0.06824403597966366, + "grad_norm": 9.714228087528694, + "learning_rate": 5e-05, + "loss": 0.1333, + "num_input_tokens_seen": 67700608, + "step": 698 + }, + { + "epoch": 0.06824403597966366, + "loss": 0.12983138859272003, + "loss_ce": 0.010599215514957905, + "loss_iou": 0.4375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 67700608, + "step": 698 + }, + { + "epoch": 0.06834180680484943, + "grad_norm": 5.410252158836677, + "learning_rate": 5e-05, + "loss": 0.1316, + "num_input_tokens_seen": 67798108, + "step": 699 + }, + { + "epoch": 0.06834180680484943, + "loss": 0.11308663338422775, + "loss_ce": 0.005725793540477753, + "loss_iou": 0.466796875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 67798108, + "step": 699 + }, + { + "epoch": 0.06843957763003519, + "grad_norm": 8.675675653775409, + "learning_rate": 5e-05, + "loss": 0.1449, + "num_input_tokens_seen": 67894024, + "step": 700 + }, + { + "epoch": 0.06843957763003519, + "loss": 0.1607748568058014, + "loss_ce": 0.004707975313067436, + "loss_iou": 0.48046875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 67894024, + "step": 700 + }, + { + "epoch": 0.06853734845522096, + "grad_norm": 11.121982693337543, + "learning_rate": 5e-05, + "loss": 0.1307, + "num_input_tokens_seen": 67991404, + "step": 701 + }, + { + "epoch": 0.06853734845522096, + "loss": 0.11980661749839783, + "loss_ce": 0.008661597967147827, + "loss_iou": 0.44921875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 67991404, + "step": 701 + }, + { + "epoch": 0.06863511928040672, + "grad_norm": 3.180336331806758, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 68087080, + "step": 702 + }, + { + "epoch": 0.06863511928040672, + "loss": 0.07624297589063644, + "loss_ce": 0.008783869445323944, + "loss_iou": 0.32421875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 68087080, + "step": 702 + }, + { + "epoch": 0.0687328901055925, + "grad_norm": 5.4155681488346765, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 68184176, + "step": 703 + }, + { + "epoch": 0.0687328901055925, + "loss": 0.08035436272621155, + "loss_ce": 0.0026871212758123875, + "loss_iou": 0.30859375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 68184176, + "step": 703 + }, + { + "epoch": 0.06883066093077826, + "grad_norm": 9.908650568067749, + "learning_rate": 5e-05, + "loss": 0.1421, + "num_input_tokens_seen": 68280684, + "step": 704 + }, + { + "epoch": 0.06883066093077826, + "loss": 0.13124023377895355, + "loss_ce": 0.008162845857441425, + "loss_iou": 0.357421875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 68280684, + "step": 704 + }, + { + "epoch": 0.06892843175596403, + "grad_norm": 22.945936256361474, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 68378088, + "step": 705 + }, + { + "epoch": 0.06892843175596403, + "loss": 0.0726035013794899, + "loss_ce": 0.007204330991953611, + "loss_iou": 0.451171875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 68378088, + "step": 705 + }, + { + "epoch": 0.06902620258114979, + "grad_norm": 4.661862476847554, + "learning_rate": 5e-05, + "loss": 0.1343, + "num_input_tokens_seen": 68474592, + "step": 706 + }, + { + "epoch": 0.06902620258114979, + "loss": 0.1382351666688919, + "loss_ce": 0.009359435178339481, + "loss_iou": 0.40625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 68474592, + "step": 706 + }, + { + "epoch": 0.06912397340633555, + "grad_norm": 2.8067534111709054, + "learning_rate": 5e-05, + "loss": 0.1521, + "num_input_tokens_seen": 68571128, + "step": 707 + }, + { + "epoch": 0.06912397340633555, + "loss": 0.13882341980934143, + "loss_ce": 0.01144303660839796, + "loss_iou": 0.4296875, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 68571128, + "step": 707 + }, + { + "epoch": 0.06922174423152132, + "grad_norm": 13.423203618119992, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 68669708, + "step": 708 + }, + { + "epoch": 0.06922174423152132, + "loss": 0.10945300757884979, + "loss_ce": 0.005235475953668356, + "loss_iou": 0.439453125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 68669708, + "step": 708 + }, + { + "epoch": 0.06931951505670708, + "grad_norm": 15.955629706215083, + "learning_rate": 5e-05, + "loss": 0.1557, + "num_input_tokens_seen": 68768236, + "step": 709 + }, + { + "epoch": 0.06931951505670708, + "loss": 0.1539686620235443, + "loss_ce": 0.005409084726125002, + "loss_iou": 0.435546875, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 68768236, + "step": 709 + }, + { + "epoch": 0.06941728588189285, + "grad_norm": 6.532615614270428, + "learning_rate": 5e-05, + "loss": 0.1932, + "num_input_tokens_seen": 68864032, + "step": 710 + }, + { + "epoch": 0.06941728588189285, + "loss": 0.22441810369491577, + "loss_ce": 0.007255030330270529, + "loss_iou": 0.298828125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 68864032, + "step": 710 + }, + { + "epoch": 0.06951505670707861, + "grad_norm": 4.478640335418371, + "learning_rate": 5e-05, + "loss": 0.1457, + "num_input_tokens_seen": 68960692, + "step": 711 + }, + { + "epoch": 0.06951505670707861, + "loss": 0.1405259668827057, + "loss_ce": 0.00342574343085289, + "loss_iou": 0.384765625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 68960692, + "step": 711 + }, + { + "epoch": 0.06961282753226437, + "grad_norm": 8.947377332732081, + "learning_rate": 5e-05, + "loss": 0.179, + "num_input_tokens_seen": 69057092, + "step": 712 + }, + { + "epoch": 0.06961282753226437, + "loss": 0.23817500472068787, + "loss_ce": 0.007217965088784695, + "loss_iou": 0.44140625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 69057092, + "step": 712 + }, + { + "epoch": 0.06971059835745014, + "grad_norm": 9.983690249771778, + "learning_rate": 5e-05, + "loss": 0.1435, + "num_input_tokens_seen": 69153328, + "step": 713 + }, + { + "epoch": 0.06971059835745014, + "loss": 0.1618589609861374, + "loss_ce": 0.003655842272564769, + "loss_iou": 0.361328125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 69153328, + "step": 713 + }, + { + "epoch": 0.0698083691826359, + "grad_norm": 9.025386373166898, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 69250316, + "step": 714 + }, + { + "epoch": 0.0698083691826359, + "loss": 0.08681613206863403, + "loss_ce": 0.004998499993234873, + "loss_iou": 0.380859375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 69250316, + "step": 714 + }, + { + "epoch": 0.06990614000782167, + "grad_norm": 7.594884231930642, + "learning_rate": 5e-05, + "loss": 0.1595, + "num_input_tokens_seen": 69347148, + "step": 715 + }, + { + "epoch": 0.06990614000782167, + "loss": 0.17783844470977783, + "loss_ce": 0.0026065127458423376, + "loss_iou": 0.494140625, + "loss_num": 0.03515625, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 69347148, + "step": 715 + }, + { + "epoch": 0.07000391083300743, + "grad_norm": 9.728889532037126, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 69443360, + "step": 716 + }, + { + "epoch": 0.07000391083300743, + "loss": 0.16912393271923065, + "loss_ce": 0.007731716614216566, + "loss_iou": 0.36328125, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 69443360, + "step": 716 + }, + { + "epoch": 0.0701016816581932, + "grad_norm": 12.799734518042712, + "learning_rate": 5e-05, + "loss": 0.1351, + "num_input_tokens_seen": 69539984, + "step": 717 + }, + { + "epoch": 0.0701016816581932, + "loss": 0.15018223226070404, + "loss_ce": 0.007298936136066914, + "loss_iou": 0.423828125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 69539984, + "step": 717 + }, + { + "epoch": 0.07019945248337896, + "grad_norm": 5.088416173439336, + "learning_rate": 5e-05, + "loss": 0.1358, + "num_input_tokens_seen": 69637836, + "step": 718 + }, + { + "epoch": 0.07019945248337896, + "loss": 0.1629677414894104, + "loss_ce": 0.006839822046458721, + "loss_iou": 0.4296875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 69637836, + "step": 718 + }, + { + "epoch": 0.07029722330856472, + "grad_norm": 13.715428428119692, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 69734864, + "step": 719 + }, + { + "epoch": 0.07029722330856472, + "loss": 0.11197197437286377, + "loss_ce": 0.004206774290651083, + "loss_iou": 0.384765625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 69734864, + "step": 719 + }, + { + "epoch": 0.07039499413375049, + "grad_norm": 4.538886132942736, + "learning_rate": 5e-05, + "loss": 0.1212, + "num_input_tokens_seen": 69832428, + "step": 720 + }, + { + "epoch": 0.07039499413375049, + "loss": 0.1374109387397766, + "loss_ce": 0.0076959701254963875, + "loss_iou": 0.3046875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 69832428, + "step": 720 + }, + { + "epoch": 0.07049276495893625, + "grad_norm": 11.36829805095331, + "learning_rate": 5e-05, + "loss": 0.1206, + "num_input_tokens_seen": 69929036, + "step": 721 + }, + { + "epoch": 0.07049276495893625, + "loss": 0.11607397347688675, + "loss_ce": 0.0026401346549391747, + "loss_iou": 0.396484375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 69929036, + "step": 721 + }, + { + "epoch": 0.07059053578412201, + "grad_norm": 2.385134082617637, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 70025680, + "step": 722 + }, + { + "epoch": 0.07059053578412201, + "loss": 0.1250665783882141, + "loss_ce": 0.010564621537923813, + "loss_iou": 0.435546875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 70025680, + "step": 722 + }, + { + "epoch": 0.07068830660930778, + "grad_norm": 3.3870069604595723, + "learning_rate": 5e-05, + "loss": 0.1533, + "num_input_tokens_seen": 70123568, + "step": 723 + }, + { + "epoch": 0.07068830660930778, + "loss": 0.19476637244224548, + "loss_ce": 0.005526874214410782, + "loss_iou": 0.314453125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 70123568, + "step": 723 + }, + { + "epoch": 0.07078607743449354, + "grad_norm": 3.487375524180352, + "learning_rate": 5e-05, + "loss": 0.1403, + "num_input_tokens_seen": 70220756, + "step": 724 + }, + { + "epoch": 0.07078607743449354, + "loss": 0.1657456010580063, + "loss_ce": 0.010716300457715988, + "loss_iou": 0.4453125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 70220756, + "step": 724 + }, + { + "epoch": 0.0708838482596793, + "grad_norm": 10.952163070356605, + "learning_rate": 5e-05, + "loss": 0.1613, + "num_input_tokens_seen": 70317984, + "step": 725 + }, + { + "epoch": 0.0708838482596793, + "loss": 0.2230866253376007, + "loss_ce": 0.005069037899374962, + "loss_iou": 0.4765625, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 70317984, + "step": 725 + }, + { + "epoch": 0.07098161908486508, + "grad_norm": 6.953487919436853, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 70415384, + "step": 726 + }, + { + "epoch": 0.07098161908486508, + "loss": 0.08223316073417664, + "loss_ce": 0.0032536666840314865, + "loss_iou": 0.427734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 70415384, + "step": 726 + }, + { + "epoch": 0.07107938991005085, + "grad_norm": 8.95666727547641, + "learning_rate": 5e-05, + "loss": 0.1564, + "num_input_tokens_seen": 70513820, + "step": 727 + }, + { + "epoch": 0.07107938991005085, + "loss": 0.17855530977249146, + "loss_ce": 0.0019195717759430408, + "loss_iou": 0.5390625, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 70513820, + "step": 727 + }, + { + "epoch": 0.07117716073523661, + "grad_norm": 5.519580656767653, + "learning_rate": 5e-05, + "loss": 0.1313, + "num_input_tokens_seen": 70610184, + "step": 728 + }, + { + "epoch": 0.07117716073523661, + "loss": 0.14145928621292114, + "loss_ce": 0.0032146587036550045, + "loss_iou": 0.46484375, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 70610184, + "step": 728 + }, + { + "epoch": 0.07127493156042237, + "grad_norm": 9.07004267561372, + "learning_rate": 5e-05, + "loss": 0.1343, + "num_input_tokens_seen": 70707452, + "step": 729 + }, + { + "epoch": 0.07127493156042237, + "loss": 0.09089235961437225, + "loss_ce": 0.008128689602017403, + "loss_iou": 0.44921875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 70707452, + "step": 729 + }, + { + "epoch": 0.07137270238560814, + "grad_norm": 7.786991232086792, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 70804720, + "step": 730 + }, + { + "epoch": 0.07137270238560814, + "loss": 0.14481063187122345, + "loss_ce": 0.005681000184267759, + "loss_iou": 0.39453125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 70804720, + "step": 730 + }, + { + "epoch": 0.0714704732107939, + "grad_norm": 10.72718842184827, + "learning_rate": 5e-05, + "loss": 0.1305, + "num_input_tokens_seen": 70902492, + "step": 731 + }, + { + "epoch": 0.0714704732107939, + "loss": 0.11160603165626526, + "loss_ce": 0.0047334711998701096, + "loss_iou": 0.37890625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 70902492, + "step": 731 + }, + { + "epoch": 0.07156824403597967, + "grad_norm": 5.0815321712391865, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 70998860, + "step": 732 + }, + { + "epoch": 0.07156824403597967, + "loss": 0.14280542731285095, + "loss_ce": 0.005781510844826698, + "loss_iou": 0.302734375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 70998860, + "step": 732 + }, + { + "epoch": 0.07166601486116543, + "grad_norm": 4.668780385658542, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 71097512, + "step": 733 + }, + { + "epoch": 0.07166601486116543, + "loss": 0.08904200047254562, + "loss_ce": 0.00433283532038331, + "loss_iou": 0.41015625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 71097512, + "step": 733 + }, + { + "epoch": 0.0717637856863512, + "grad_norm": 3.7972660131250384, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 71193864, + "step": 734 + }, + { + "epoch": 0.0717637856863512, + "loss": 0.09338803589344025, + "loss_ce": 0.0052608950063586235, + "loss_iou": 0.23828125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 71193864, + "step": 734 + }, + { + "epoch": 0.07186155651153696, + "grad_norm": 15.171040808466026, + "learning_rate": 5e-05, + "loss": 0.1294, + "num_input_tokens_seen": 71291656, + "step": 735 + }, + { + "epoch": 0.07186155651153696, + "loss": 0.10870419442653656, + "loss_ce": 0.008026708848774433, + "loss_iou": 0.51171875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 71291656, + "step": 735 + }, + { + "epoch": 0.07195932733672272, + "grad_norm": 8.297814747543033, + "learning_rate": 5e-05, + "loss": 0.1932, + "num_input_tokens_seen": 71388524, + "step": 736 + }, + { + "epoch": 0.07195932733672272, + "loss": 0.2123277485370636, + "loss_ce": 0.009324826300144196, + "loss_iou": 0.310546875, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 71388524, + "step": 736 + }, + { + "epoch": 0.07205709816190849, + "grad_norm": 2.635969466425834, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 71485420, + "step": 737 + }, + { + "epoch": 0.07205709816190849, + "loss": 0.14767438173294067, + "loss_ce": 0.007720772176980972, + "loss_iou": 0.333984375, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 71485420, + "step": 737 + }, + { + "epoch": 0.07215486898709425, + "grad_norm": 3.281088051717643, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 71581744, + "step": 738 + }, + { + "epoch": 0.07215486898709425, + "loss": 0.11694703996181488, + "loss_ce": 0.003650533501058817, + "loss_iou": 0.451171875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 71581744, + "step": 738 + }, + { + "epoch": 0.07225263981228001, + "grad_norm": 4.3818941462212155, + "learning_rate": 5e-05, + "loss": 0.1338, + "num_input_tokens_seen": 71679104, + "step": 739 + }, + { + "epoch": 0.07225263981228001, + "loss": 0.10322003066539764, + "loss_ce": 0.009622624143958092, + "loss_iou": 0.314453125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 71679104, + "step": 739 + }, + { + "epoch": 0.07235041063746578, + "grad_norm": 10.033000104617944, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 71776004, + "step": 740 + }, + { + "epoch": 0.07235041063746578, + "loss": 0.13854217529296875, + "loss_ce": 0.009635921567678452, + "loss_iou": 0.44921875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 71776004, + "step": 740 + }, + { + "epoch": 0.07244818146265154, + "grad_norm": 20.95162346262031, + "learning_rate": 5e-05, + "loss": 0.151, + "num_input_tokens_seen": 71872740, + "step": 741 + }, + { + "epoch": 0.07244818146265154, + "loss": 0.14273828268051147, + "loss_ce": 0.008583005517721176, + "loss_iou": 0.318359375, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 71872740, + "step": 741 + }, + { + "epoch": 0.0725459522878373, + "grad_norm": 3.045187545737235, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 71969420, + "step": 742 + }, + { + "epoch": 0.0725459522878373, + "loss": 0.13964001834392548, + "loss_ce": 0.004096187651157379, + "loss_iou": 0.318359375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 71969420, + "step": 742 + }, + { + "epoch": 0.07264372311302307, + "grad_norm": 7.532766764120231, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 72067128, + "step": 743 + }, + { + "epoch": 0.07264372311302307, + "loss": 0.12822867929935455, + "loss_ce": 0.011254807002842426, + "loss_iou": 0.58203125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 72067128, + "step": 743 + }, + { + "epoch": 0.07274149393820883, + "grad_norm": 4.45385416255065, + "learning_rate": 5e-05, + "loss": 0.132, + "num_input_tokens_seen": 72164668, + "step": 744 + }, + { + "epoch": 0.07274149393820883, + "loss": 0.13873115181922913, + "loss_ce": 0.005125200375914574, + "loss_iou": 0.310546875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 72164668, + "step": 744 + }, + { + "epoch": 0.0728392647633946, + "grad_norm": 5.4559997003772835, + "learning_rate": 5e-05, + "loss": 0.1407, + "num_input_tokens_seen": 72261360, + "step": 745 + }, + { + "epoch": 0.0728392647633946, + "loss": 0.09379930794239044, + "loss_ce": 0.008746808394789696, + "loss_iou": 0.451171875, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 72261360, + "step": 745 + }, + { + "epoch": 0.07293703558858036, + "grad_norm": 3.89377601670669, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 72358044, + "step": 746 + }, + { + "epoch": 0.07293703558858036, + "loss": 0.09357139468193054, + "loss_ce": 0.006199575029313564, + "loss_iou": 0.34375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 72358044, + "step": 746 + }, + { + "epoch": 0.07303480641376613, + "grad_norm": 14.906914746002919, + "learning_rate": 5e-05, + "loss": 0.1283, + "num_input_tokens_seen": 72454732, + "step": 747 + }, + { + "epoch": 0.07303480641376613, + "loss": 0.1173291727900505, + "loss_ce": 0.003559640608727932, + "loss_iou": 0.44921875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 72454732, + "step": 747 + }, + { + "epoch": 0.07313257723895189, + "grad_norm": 9.301982884037361, + "learning_rate": 5e-05, + "loss": 0.146, + "num_input_tokens_seen": 72552352, + "step": 748 + }, + { + "epoch": 0.07313257723895189, + "loss": 0.11970211565494537, + "loss_ce": 0.007092249114066362, + "loss_iou": 0.419921875, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 72552352, + "step": 748 + }, + { + "epoch": 0.07323034806413767, + "grad_norm": 19.385341837317906, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 72649468, + "step": 749 + }, + { + "epoch": 0.07323034806413767, + "loss": 0.10601072013378143, + "loss_ce": 0.004173566587269306, + "loss_iou": 0.423828125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 72649468, + "step": 749 + }, + { + "epoch": 0.07332811888932343, + "grad_norm": 8.070967736230015, + "learning_rate": 5e-05, + "loss": 0.1403, + "num_input_tokens_seen": 72746640, + "step": 750 + }, + { + "epoch": 0.07332811888932343, + "eval_seeclick_CIoU": 0.34309862554073334, + "eval_seeclick_GIoU": 0.3358134329319, + "eval_seeclick_IoU": 0.41419483721256256, + "eval_seeclick_MAE_all": 0.10432751104235649, + "eval_seeclick_MAE_h": 0.04960937798023224, + "eval_seeclick_MAE_w": 0.1540832445025444, + "eval_seeclick_MAE_x": 0.16728176176548004, + "eval_seeclick_MAE_y": 0.04633566550910473, + "eval_seeclick_NUM_probability": 0.9945521056652069, + "eval_seeclick_inside_bbox": 0.6463068127632141, + "eval_seeclick_loss": 0.366226464509964, + "eval_seeclick_loss_ce": 0.01377165550366044, + "eval_seeclick_loss_iou": 0.5355224609375, + "eval_seeclick_loss_num": 0.06805419921875, + "eval_seeclick_loss_xval": 0.34014892578125, + "eval_seeclick_runtime": 73.5981, + "eval_seeclick_samples_per_second": 0.584, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 72746640, + "step": 750 + }, + { + "epoch": 0.07332811888932343, + "eval_icons_CIoU": 0.4807487726211548, + "eval_icons_GIoU": 0.4650620371103287, + "eval_icons_IoU": 0.5343823432922363, + "eval_icons_MAE_all": 0.1034701056778431, + "eval_icons_MAE_h": 0.12026948109269142, + "eval_icons_MAE_w": 0.08937402069568634, + "eval_icons_MAE_x": 0.08315024524927139, + "eval_icons_MAE_y": 0.12108667567372322, + "eval_icons_NUM_probability": 0.9986411929130554, + "eval_icons_inside_bbox": 0.7395833432674408, + "eval_icons_loss": 0.3170141577720642, + "eval_icons_loss_ce": 0.0005065755103714764, + "eval_icons_loss_iou": 0.3768310546875, + "eval_icons_loss_num": 0.06404876708984375, + "eval_icons_loss_xval": 0.320281982421875, + "eval_icons_runtime": 84.0781, + "eval_icons_samples_per_second": 0.595, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 72746640, + "step": 750 + }, + { + "epoch": 0.07332811888932343, + "eval_screenspot_CIoU": 0.33644209305445355, + "eval_screenspot_GIoU": 0.32904431223869324, + "eval_screenspot_IoU": 0.42414647340774536, + "eval_screenspot_MAE_all": 0.14972146848837534, + "eval_screenspot_MAE_h": 0.10917383432388306, + "eval_screenspot_MAE_w": 0.21484426905711493, + "eval_screenspot_MAE_x": 0.17348912358283997, + "eval_screenspot_MAE_y": 0.10137867430845897, + "eval_screenspot_NUM_probability": 0.9995939532915751, + "eval_screenspot_inside_bbox": 0.6745833357175192, + "eval_screenspot_loss": 0.533226728439331, + "eval_screenspot_loss_ce": 0.01568922804047664, + "eval_screenspot_loss_iou": 0.3954264322916667, + "eval_screenspot_loss_num": 0.10564168294270833, + "eval_screenspot_loss_xval": 0.52813720703125, + "eval_screenspot_runtime": 147.6001, + "eval_screenspot_samples_per_second": 0.603, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 72746640, + "step": 750 + }, + { + "epoch": 0.07332811888932343, + "eval_compot_CIoU": 0.47209927439689636, + "eval_compot_GIoU": 0.45286327600479126, + "eval_compot_IoU": 0.5265496969223022, + "eval_compot_MAE_all": 0.08976174145936966, + "eval_compot_MAE_h": 0.08136987313628197, + "eval_compot_MAE_w": 0.11504679545760155, + "eval_compot_MAE_x": 0.09894436225295067, + "eval_compot_MAE_y": 0.06368594989180565, + "eval_compot_NUM_probability": 0.9992908537387848, + "eval_compot_inside_bbox": 0.7673611044883728, + "eval_compot_loss": 0.39008593559265137, + "eval_compot_loss_ce": 0.08293026685714722, + "eval_compot_loss_iou": 0.5750732421875, + "eval_compot_loss_num": 0.058837890625, + "eval_compot_loss_xval": 0.2940673828125, + "eval_compot_runtime": 83.3397, + "eval_compot_samples_per_second": 0.6, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 72746640, + "step": 750 + }, + { + "epoch": 0.07332811888932343, + "loss": 0.39230334758758545, + "loss_ce": 0.08627305924892426, + "loss_iou": 0.5546875, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 72746640, + "step": 750 + }, + { + "epoch": 0.0734258897145092, + "grad_norm": 4.9034879550542225, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 72843832, + "step": 751 + }, + { + "epoch": 0.0734258897145092, + "loss": 0.15486571192741394, + "loss_ce": 0.007923576980829239, + "loss_iou": 0.408203125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 72843832, + "step": 751 + }, + { + "epoch": 0.07352366053969496, + "grad_norm": 16.068419618598078, + "learning_rate": 5e-05, + "loss": 0.1315, + "num_input_tokens_seen": 72942172, + "step": 752 + }, + { + "epoch": 0.07352366053969496, + "loss": 0.15393730998039246, + "loss_ce": 0.009345022030174732, + "loss_iou": 0.34375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 72942172, + "step": 752 + }, + { + "epoch": 0.07362143136488072, + "grad_norm": 4.986285174974167, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 73039116, + "step": 753 + }, + { + "epoch": 0.07362143136488072, + "loss": 0.08497914671897888, + "loss_ce": 0.011691181920468807, + "loss_iou": 0.37109375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 73039116, + "step": 753 + }, + { + "epoch": 0.07371920219006649, + "grad_norm": 3.4536035503144786, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 73135492, + "step": 754 + }, + { + "epoch": 0.07371920219006649, + "loss": 0.12656405568122864, + "loss_ce": 0.00899507850408554, + "loss_iou": 0.2392578125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 73135492, + "step": 754 + }, + { + "epoch": 0.07381697301525225, + "grad_norm": 3.748618767440868, + "learning_rate": 5e-05, + "loss": 0.1465, + "num_input_tokens_seen": 73232828, + "step": 755 + }, + { + "epoch": 0.07381697301525225, + "loss": 0.18269726634025574, + "loss_ce": 0.008441897109150887, + "loss_iou": 0.41796875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 73232828, + "step": 755 + }, + { + "epoch": 0.07391474384043802, + "grad_norm": 7.900325564346994, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 73329500, + "step": 756 + }, + { + "epoch": 0.07391474384043802, + "loss": 0.0856657549738884, + "loss_ce": 0.009799059480428696, + "loss_iou": 0.333984375, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 73329500, + "step": 756 + }, + { + "epoch": 0.07401251466562378, + "grad_norm": 3.994721448949123, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 73426524, + "step": 757 + }, + { + "epoch": 0.07401251466562378, + "loss": 0.09219870716333389, + "loss_ce": 0.005910250823944807, + "loss_iou": 0.3515625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 73426524, + "step": 757 + }, + { + "epoch": 0.07411028549080954, + "grad_norm": 3.7999125391276376, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 73523564, + "step": 758 + }, + { + "epoch": 0.07411028549080954, + "loss": 0.10355639457702637, + "loss_ce": 0.005182977765798569, + "loss_iou": 0.41015625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 73523564, + "step": 758 + }, + { + "epoch": 0.07420805631599531, + "grad_norm": 5.27226069147265, + "learning_rate": 5e-05, + "loss": 0.1225, + "num_input_tokens_seen": 73621016, + "step": 759 + }, + { + "epoch": 0.07420805631599531, + "loss": 0.14159877598285675, + "loss_ce": 0.009305081330239773, + "loss_iou": 0.40234375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 73621016, + "step": 759 + }, + { + "epoch": 0.07430582714118107, + "grad_norm": 10.370474328305871, + "learning_rate": 5e-05, + "loss": 0.1545, + "num_input_tokens_seen": 73718452, + "step": 760 + }, + { + "epoch": 0.07430582714118107, + "loss": 0.15321576595306396, + "loss_ce": 0.00926436111330986, + "loss_iou": 0.474609375, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 73718452, + "step": 760 + }, + { + "epoch": 0.07440359796636684, + "grad_norm": 5.690276366492948, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 73815460, + "step": 761 + }, + { + "epoch": 0.07440359796636684, + "loss": 0.12365780770778656, + "loss_ce": 0.006592385936528444, + "loss_iou": 0.361328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 73815460, + "step": 761 + }, + { + "epoch": 0.0745013687915526, + "grad_norm": 16.916088436456523, + "learning_rate": 5e-05, + "loss": 0.157, + "num_input_tokens_seen": 73912084, + "step": 762 + }, + { + "epoch": 0.0745013687915526, + "loss": 0.178024560213089, + "loss_ce": 0.002426413120701909, + "loss_iou": 0.4609375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 73912084, + "step": 762 + }, + { + "epoch": 0.07459913961673836, + "grad_norm": 9.996992480485451, + "learning_rate": 5e-05, + "loss": 0.1212, + "num_input_tokens_seen": 74009260, + "step": 763 + }, + { + "epoch": 0.07459913961673836, + "loss": 0.13064032793045044, + "loss_ce": 0.00704413466155529, + "loss_iou": 0.5234375, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 74009260, + "step": 763 + }, + { + "epoch": 0.07469691044192413, + "grad_norm": 16.32320261371941, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 74106716, + "step": 764 + }, + { + "epoch": 0.07469691044192413, + "loss": 0.12223616242408752, + "loss_ce": 0.005781086627393961, + "loss_iou": 0.439453125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 74106716, + "step": 764 + }, + { + "epoch": 0.07479468126710989, + "grad_norm": 4.4562773704324545, + "learning_rate": 5e-05, + "loss": 0.1222, + "num_input_tokens_seen": 74203348, + "step": 765 + }, + { + "epoch": 0.07479468126710989, + "loss": 0.11173857748508453, + "loss_ce": 0.0016387822106480598, + "loss_iou": 0.48828125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 74203348, + "step": 765 + }, + { + "epoch": 0.07489245209229566, + "grad_norm": 3.665519018404813, + "learning_rate": 5e-05, + "loss": 0.154, + "num_input_tokens_seen": 74300692, + "step": 766 + }, + { + "epoch": 0.07489245209229566, + "loss": 0.2091664969921112, + "loss_ce": 0.008360836654901505, + "loss_iou": 0.291015625, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 74300692, + "step": 766 + }, + { + "epoch": 0.07499022291748142, + "grad_norm": 6.165024726626178, + "learning_rate": 5e-05, + "loss": 0.1673, + "num_input_tokens_seen": 74397924, + "step": 767 + }, + { + "epoch": 0.07499022291748142, + "loss": 0.1660449504852295, + "loss_ce": 0.0029590032063424587, + "loss_iou": 0.4453125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 74397924, + "step": 767 + }, + { + "epoch": 0.07508799374266718, + "grad_norm": 4.134065292914015, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 74494928, + "step": 768 + }, + { + "epoch": 0.07508799374266718, + "loss": 0.1292107105255127, + "loss_ce": 0.006957286037504673, + "loss_iou": 0.3828125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 74494928, + "step": 768 + }, + { + "epoch": 0.07518576456785295, + "grad_norm": 8.75600411324448, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 74592324, + "step": 769 + }, + { + "epoch": 0.07518576456785295, + "loss": 0.07580610364675522, + "loss_ce": 0.00241132453083992, + "loss_iou": 0.353515625, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 74592324, + "step": 769 + }, + { + "epoch": 0.07528353539303871, + "grad_norm": 12.59655517009052, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 74689292, + "step": 770 + }, + { + "epoch": 0.07528353539303871, + "loss": 0.11351001262664795, + "loss_ce": 0.00889575108885765, + "loss_iou": 0.40625, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 74689292, + "step": 770 + }, + { + "epoch": 0.07538130621822448, + "grad_norm": 3.814309121466588, + "learning_rate": 5e-05, + "loss": 0.1422, + "num_input_tokens_seen": 74786316, + "step": 771 + }, + { + "epoch": 0.07538130621822448, + "loss": 0.12741947174072266, + "loss_ce": 0.009102826938033104, + "loss_iou": 0.400390625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 74786316, + "step": 771 + }, + { + "epoch": 0.07547907704341025, + "grad_norm": 15.173495543766672, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 74883856, + "step": 772 + }, + { + "epoch": 0.07547907704341025, + "loss": 0.13493360579013824, + "loss_ce": 0.004257332533597946, + "loss_iou": 0.515625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 74883856, + "step": 772 + }, + { + "epoch": 0.07557684786859602, + "grad_norm": 4.854606258541567, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 74979516, + "step": 773 + }, + { + "epoch": 0.07557684786859602, + "loss": 0.09179700165987015, + "loss_ce": 0.006958132144063711, + "loss_iou": 0.291015625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 74979516, + "step": 773 + }, + { + "epoch": 0.07567461869378178, + "grad_norm": 9.121256728847767, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 75077096, + "step": 774 + }, + { + "epoch": 0.07567461869378178, + "loss": 0.08155407011508942, + "loss_ce": 0.004436152055859566, + "loss_iou": 0.3984375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 75077096, + "step": 774 + }, + { + "epoch": 0.07577238951896755, + "grad_norm": 5.781098245576453, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 75174164, + "step": 775 + }, + { + "epoch": 0.07577238951896755, + "loss": 0.08408280462026596, + "loss_ce": 0.007353982422500849, + "loss_iou": 0.5, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 75174164, + "step": 775 + }, + { + "epoch": 0.07587016034415331, + "grad_norm": 11.24504095473693, + "learning_rate": 5e-05, + "loss": 0.1258, + "num_input_tokens_seen": 75270896, + "step": 776 + }, + { + "epoch": 0.07587016034415331, + "loss": 0.10708356648683548, + "loss_ce": 0.0037510469555854797, + "loss_iou": 0.455078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 75270896, + "step": 776 + }, + { + "epoch": 0.07596793116933907, + "grad_norm": 5.350324981478623, + "learning_rate": 5e-05, + "loss": 0.1519, + "num_input_tokens_seen": 75367980, + "step": 777 + }, + { + "epoch": 0.07596793116933907, + "loss": 0.13433390855789185, + "loss_ce": 0.003566090017557144, + "loss_iou": 0.31640625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 75367980, + "step": 777 + }, + { + "epoch": 0.07606570199452484, + "grad_norm": 4.195374548173783, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 75465488, + "step": 778 + }, + { + "epoch": 0.07606570199452484, + "loss": 0.11028720438480377, + "loss_ce": 0.005672938656061888, + "loss_iou": 0.49609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 75465488, + "step": 778 + }, + { + "epoch": 0.0761634728197106, + "grad_norm": 9.03920167718278, + "learning_rate": 5e-05, + "loss": 0.1632, + "num_input_tokens_seen": 75562448, + "step": 779 + }, + { + "epoch": 0.0761634728197106, + "loss": 0.15631595253944397, + "loss_ce": 0.010350372642278671, + "loss_iou": 0.447265625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 75562448, + "step": 779 + }, + { + "epoch": 0.07626124364489636, + "grad_norm": 12.558779378714663, + "learning_rate": 5e-05, + "loss": 0.148, + "num_input_tokens_seen": 75660008, + "step": 780 + }, + { + "epoch": 0.07626124364489636, + "loss": 0.17785882949829102, + "loss_ce": 0.008181083016097546, + "loss_iou": 0.34375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 75660008, + "step": 780 + }, + { + "epoch": 0.07635901447008213, + "grad_norm": 5.773365181062827, + "learning_rate": 5e-05, + "loss": 0.1311, + "num_input_tokens_seen": 75756980, + "step": 781 + }, + { + "epoch": 0.07635901447008213, + "loss": 0.10987737774848938, + "loss_ce": 0.006537231616675854, + "loss_iou": 0.28125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 75756980, + "step": 781 + }, + { + "epoch": 0.07645678529526789, + "grad_norm": 4.178997381660744, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 75854488, + "step": 782 + }, + { + "epoch": 0.07645678529526789, + "loss": 0.07846271991729736, + "loss_ce": 0.006204717792570591, + "loss_iou": 0.37890625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 75854488, + "step": 782 + }, + { + "epoch": 0.07655455612045366, + "grad_norm": 7.170195196914523, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 75951340, + "step": 783 + }, + { + "epoch": 0.07655455612045366, + "loss": 0.10804836452007294, + "loss_ce": 0.007553978357464075, + "loss_iou": 0.2158203125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 75951340, + "step": 783 + }, + { + "epoch": 0.07665232694563942, + "grad_norm": 12.045689554458864, + "learning_rate": 5e-05, + "loss": 0.1391, + "num_input_tokens_seen": 76047608, + "step": 784 + }, + { + "epoch": 0.07665232694563942, + "loss": 0.1722664088010788, + "loss_ce": 0.005884576588869095, + "loss_iou": 0.3046875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 76047608, + "step": 784 + }, + { + "epoch": 0.07675009777082518, + "grad_norm": 14.356522426474653, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 76144436, + "step": 785 + }, + { + "epoch": 0.07675009777082518, + "loss": 0.09753550589084625, + "loss_ce": 0.0079511608928442, + "loss_iou": 0.494140625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 76144436, + "step": 785 + }, + { + "epoch": 0.07684786859601095, + "grad_norm": 18.476743085316155, + "learning_rate": 5e-05, + "loss": 0.1571, + "num_input_tokens_seen": 76240892, + "step": 786 + }, + { + "epoch": 0.07684786859601095, + "loss": 0.16250097751617432, + "loss_ce": 0.008509278297424316, + "loss_iou": 0.390625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 76240892, + "step": 786 + }, + { + "epoch": 0.07694563942119671, + "grad_norm": 5.517693057499438, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 76337612, + "step": 787 + }, + { + "epoch": 0.07694563942119671, + "loss": 0.10499639809131622, + "loss_ce": 0.0092932702973485, + "loss_iou": 0.419921875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 76337612, + "step": 787 + }, + { + "epoch": 0.07704341024638248, + "grad_norm": 5.018717336189992, + "learning_rate": 5e-05, + "loss": 0.1379, + "num_input_tokens_seen": 76435260, + "step": 788 + }, + { + "epoch": 0.07704341024638248, + "loss": 0.1208113357424736, + "loss_ce": 0.006690847687423229, + "loss_iou": 0.310546875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 76435260, + "step": 788 + }, + { + "epoch": 0.07714118107156824, + "grad_norm": 4.51875354485518, + "learning_rate": 5e-05, + "loss": 0.1317, + "num_input_tokens_seen": 76532784, + "step": 789 + }, + { + "epoch": 0.07714118107156824, + "loss": 0.11038508266210556, + "loss_ce": 0.0019675763323903084, + "loss_iou": 0.3359375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 76532784, + "step": 789 + }, + { + "epoch": 0.077238951896754, + "grad_norm": 3.54313417381988, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 76629744, + "step": 790 + }, + { + "epoch": 0.077238951896754, + "loss": 0.10168155282735825, + "loss_ce": 0.002713048830628395, + "loss_iou": 0.423828125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 76629744, + "step": 790 + }, + { + "epoch": 0.07733672272193977, + "grad_norm": 21.72798873937619, + "learning_rate": 5e-05, + "loss": 0.153, + "num_input_tokens_seen": 76726968, + "step": 791 + }, + { + "epoch": 0.07733672272193977, + "loss": 0.10163503885269165, + "loss_ce": 0.0050163897685706615, + "loss_iou": 0.5078125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 76726968, + "step": 791 + }, + { + "epoch": 0.07743449354712553, + "grad_norm": 8.168552490959264, + "learning_rate": 5e-05, + "loss": 0.1629, + "num_input_tokens_seen": 76824224, + "step": 792 + }, + { + "epoch": 0.07743449354712553, + "loss": 0.19935950636863708, + "loss_ce": 0.00831945613026619, + "loss_iou": 0.447265625, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 76824224, + "step": 792 + }, + { + "epoch": 0.0775322643723113, + "grad_norm": 10.87840459546269, + "learning_rate": 5e-05, + "loss": 0.1204, + "num_input_tokens_seen": 76921056, + "step": 793 + }, + { + "epoch": 0.0775322643723113, + "loss": 0.13826915621757507, + "loss_ce": 0.011148189194500446, + "loss_iou": 0.3515625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 76921056, + "step": 793 + }, + { + "epoch": 0.07763003519749706, + "grad_norm": 12.435446685592842, + "learning_rate": 5e-05, + "loss": 0.1304, + "num_input_tokens_seen": 77018808, + "step": 794 + }, + { + "epoch": 0.07763003519749706, + "loss": 0.14810031652450562, + "loss_ce": 0.008360326290130615, + "loss_iou": 0.392578125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 77018808, + "step": 794 + }, + { + "epoch": 0.07772780602268284, + "grad_norm": 6.77265750445358, + "learning_rate": 5e-05, + "loss": 0.1519, + "num_input_tokens_seen": 77115912, + "step": 795 + }, + { + "epoch": 0.07772780602268284, + "loss": 0.1549772322177887, + "loss_ce": 0.0038541750982403755, + "loss_iou": 0.345703125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 77115912, + "step": 795 + }, + { + "epoch": 0.0778255768478686, + "grad_norm": 16.357850612829107, + "learning_rate": 5e-05, + "loss": 0.1329, + "num_input_tokens_seen": 77214120, + "step": 796 + }, + { + "epoch": 0.0778255768478686, + "loss": 0.1547287255525589, + "loss_ce": 0.0063217394053936005, + "loss_iou": 0.462890625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 77214120, + "step": 796 + }, + { + "epoch": 0.07792334767305437, + "grad_norm": 3.8893526623647423, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 77310604, + "step": 797 + }, + { + "epoch": 0.07792334767305437, + "loss": 0.06785094738006592, + "loss_ce": 0.0028485071379691362, + "loss_iou": 0.27734375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 77310604, + "step": 797 + }, + { + "epoch": 0.07802111849824013, + "grad_norm": 7.231462316617548, + "learning_rate": 5e-05, + "loss": 0.1465, + "num_input_tokens_seen": 77406960, + "step": 798 + }, + { + "epoch": 0.07802111849824013, + "loss": 0.1642603874206543, + "loss_ce": 0.003432752098888159, + "loss_iou": 0.375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 77406960, + "step": 798 + }, + { + "epoch": 0.0781188893234259, + "grad_norm": 13.374243320092356, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 77503912, + "step": 799 + }, + { + "epoch": 0.0781188893234259, + "loss": 0.13003414869308472, + "loss_ce": 0.008543668314814568, + "loss_iou": 0.439453125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 77503912, + "step": 799 + }, + { + "epoch": 0.07821666014861166, + "grad_norm": 4.331986789251518, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 77601076, + "step": 800 + }, + { + "epoch": 0.07821666014861166, + "loss": 0.07927753776311874, + "loss_ce": 0.004097484052181244, + "loss_iou": 0.439453125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 77601076, + "step": 800 + }, + { + "epoch": 0.07831443097379742, + "grad_norm": 10.332691861445277, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 77698296, + "step": 801 + }, + { + "epoch": 0.07831443097379742, + "loss": 0.1432093232870102, + "loss_ce": 0.008169040083885193, + "loss_iou": 0.51171875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 77698296, + "step": 801 + }, + { + "epoch": 0.07841220179898319, + "grad_norm": 10.80673091851748, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 77794804, + "step": 802 + }, + { + "epoch": 0.07841220179898319, + "loss": 0.08044849336147308, + "loss_ce": 0.005192149430513382, + "loss_iou": 0.38671875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 77794804, + "step": 802 + }, + { + "epoch": 0.07850997262416895, + "grad_norm": 7.956064632946198, + "learning_rate": 5e-05, + "loss": 0.1223, + "num_input_tokens_seen": 77891512, + "step": 803 + }, + { + "epoch": 0.07850997262416895, + "loss": 0.138464093208313, + "loss_ce": 0.00858127512037754, + "loss_iou": 0.44140625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 77891512, + "step": 803 + }, + { + "epoch": 0.07860774344935471, + "grad_norm": 7.067823467341351, + "learning_rate": 5e-05, + "loss": 0.1495, + "num_input_tokens_seen": 77987684, + "step": 804 + }, + { + "epoch": 0.07860774344935471, + "loss": 0.1298256367444992, + "loss_ce": 0.010135700926184654, + "loss_iou": 0.30859375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 77987684, + "step": 804 + }, + { + "epoch": 0.07870551427454048, + "grad_norm": 8.953233272416286, + "learning_rate": 5e-05, + "loss": 0.1427, + "num_input_tokens_seen": 78084376, + "step": 805 + }, + { + "epoch": 0.07870551427454048, + "loss": 0.1574966013431549, + "loss_ce": 0.0056411344558000565, + "loss_iou": 0.326171875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 78084376, + "step": 805 + }, + { + "epoch": 0.07880328509972624, + "grad_norm": 4.096642367619777, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 78182360, + "step": 806 + }, + { + "epoch": 0.07880328509972624, + "loss": 0.08623547852039337, + "loss_ce": 0.006828739307820797, + "loss_iou": 0.34765625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 78182360, + "step": 806 + }, + { + "epoch": 0.078901055924912, + "grad_norm": 17.912585609647465, + "learning_rate": 5e-05, + "loss": 0.1502, + "num_input_tokens_seen": 78279024, + "step": 807 + }, + { + "epoch": 0.078901055924912, + "loss": 0.1652398705482483, + "loss_ce": 0.015337534248828888, + "loss_iou": 0.2119140625, + "loss_num": 0.0299072265625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 78279024, + "step": 807 + }, + { + "epoch": 0.07899882675009777, + "grad_norm": 6.261045208361951, + "learning_rate": 5e-05, + "loss": 0.1597, + "num_input_tokens_seen": 78375816, + "step": 808 + }, + { + "epoch": 0.07899882675009777, + "loss": 0.17731282114982605, + "loss_ce": 0.005071608815342188, + "loss_iou": 0.365234375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 78375816, + "step": 808 + }, + { + "epoch": 0.07909659757528353, + "grad_norm": 7.263212795932601, + "learning_rate": 5e-05, + "loss": 0.1919, + "num_input_tokens_seen": 78472092, + "step": 809 + }, + { + "epoch": 0.07909659757528353, + "loss": 0.17037665843963623, + "loss_ce": 0.004727252759039402, + "loss_iou": 0.51171875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 78472092, + "step": 809 + }, + { + "epoch": 0.0791943684004693, + "grad_norm": 26.684297545958263, + "learning_rate": 5e-05, + "loss": 0.184, + "num_input_tokens_seen": 78569904, + "step": 810 + }, + { + "epoch": 0.0791943684004693, + "loss": 0.2225407063961029, + "loss_ce": 0.027472350746393204, + "loss_iou": 0.455078125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 78569904, + "step": 810 + }, + { + "epoch": 0.07929213922565506, + "grad_norm": 7.914211542749957, + "learning_rate": 5e-05, + "loss": 0.211, + "num_input_tokens_seen": 78666948, + "step": 811 + }, + { + "epoch": 0.07929213922565506, + "loss": 0.20527002215385437, + "loss_ce": 0.02619287371635437, + "loss_iou": 0.44140625, + "loss_num": 0.035888671875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 78666948, + "step": 811 + }, + { + "epoch": 0.07938991005084083, + "grad_norm": 3.680062671923916, + "learning_rate": 5e-05, + "loss": 0.1774, + "num_input_tokens_seen": 78764488, + "step": 812 + }, + { + "epoch": 0.07938991005084083, + "loss": 0.18346437811851501, + "loss_ce": 0.01171143725514412, + "loss_iou": 0.4296875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 78764488, + "step": 812 + }, + { + "epoch": 0.07948768087602659, + "grad_norm": 5.914044431634894, + "learning_rate": 5e-05, + "loss": 0.1762, + "num_input_tokens_seen": 78861664, + "step": 813 + }, + { + "epoch": 0.07948768087602659, + "loss": 0.18096975982189178, + "loss_ce": 0.007019560318440199, + "loss_iou": 0.310546875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 78861664, + "step": 813 + }, + { + "epoch": 0.07958545170121235, + "grad_norm": 4.851810938796123, + "learning_rate": 5e-05, + "loss": 0.1536, + "num_input_tokens_seen": 78958948, + "step": 814 + }, + { + "epoch": 0.07958545170121235, + "loss": 0.13947483897209167, + "loss_ce": 0.005563694518059492, + "loss_iou": 0.48828125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 78958948, + "step": 814 + }, + { + "epoch": 0.07968322252639812, + "grad_norm": 4.91491164510346, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 79055184, + "step": 815 + }, + { + "epoch": 0.07968322252639812, + "loss": 0.0978318303823471, + "loss_ce": 0.009544473141431808, + "loss_iou": 0.453125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 79055184, + "step": 815 + }, + { + "epoch": 0.07978099335158388, + "grad_norm": 7.132850324912511, + "learning_rate": 5e-05, + "loss": 0.1627, + "num_input_tokens_seen": 79151288, + "step": 816 + }, + { + "epoch": 0.07978099335158388, + "loss": 0.13146653771400452, + "loss_ce": 0.009762434288859367, + "loss_iou": 0.306640625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 79151288, + "step": 816 + }, + { + "epoch": 0.07987876417676965, + "grad_norm": 5.729220415018485, + "learning_rate": 5e-05, + "loss": 0.1701, + "num_input_tokens_seen": 79248124, + "step": 817 + }, + { + "epoch": 0.07987876417676965, + "loss": 0.217091903090477, + "loss_ce": 0.011990872211754322, + "loss_iou": 0.373046875, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 79248124, + "step": 817 + }, + { + "epoch": 0.07997653500195542, + "grad_norm": 8.32394697891907, + "learning_rate": 5e-05, + "loss": 0.1456, + "num_input_tokens_seen": 79344748, + "step": 818 + }, + { + "epoch": 0.07997653500195542, + "loss": 0.17335186898708344, + "loss_ce": 0.00828228984028101, + "loss_iou": 0.341796875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 79344748, + "step": 818 + }, + { + "epoch": 0.08007430582714119, + "grad_norm": 8.498977333283237, + "learning_rate": 5e-05, + "loss": 0.1378, + "num_input_tokens_seen": 79442764, + "step": 819 + }, + { + "epoch": 0.08007430582714119, + "loss": 0.16094201803207397, + "loss_ce": 0.008781366050243378, + "loss_iou": 0.361328125, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 79442764, + "step": 819 + }, + { + "epoch": 0.08017207665232695, + "grad_norm": 9.99909353483024, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 79539096, + "step": 820 + }, + { + "epoch": 0.08017207665232695, + "loss": 0.12709949910640717, + "loss_ce": 0.005212296731770039, + "loss_iou": 0.4140625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 79539096, + "step": 820 + }, + { + "epoch": 0.08026984747751272, + "grad_norm": 7.032994159817128, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 79635300, + "step": 821 + }, + { + "epoch": 0.08026984747751272, + "loss": 0.10421423614025116, + "loss_ce": 0.004665891639888287, + "loss_iou": 0.33984375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 79635300, + "step": 821 + }, + { + "epoch": 0.08036761830269848, + "grad_norm": 5.16992381012603, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 79732252, + "step": 822 + }, + { + "epoch": 0.08036761830269848, + "loss": 0.07577952742576599, + "loss_ce": 0.004810899030417204, + "loss_iou": 0.279296875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 79732252, + "step": 822 + }, + { + "epoch": 0.08046538912788424, + "grad_norm": 10.099289588916887, + "learning_rate": 5e-05, + "loss": 0.1454, + "num_input_tokens_seen": 79829620, + "step": 823 + }, + { + "epoch": 0.08046538912788424, + "loss": 0.18681484460830688, + "loss_ce": 0.01310877688229084, + "loss_iou": 0.431640625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 79829620, + "step": 823 + }, + { + "epoch": 0.08056315995307001, + "grad_norm": 12.193755728338113, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 79925864, + "step": 824 + }, + { + "epoch": 0.08056315995307001, + "loss": 0.09659738838672638, + "loss_ce": 0.004403790459036827, + "loss_iou": 0.375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 79925864, + "step": 824 + }, + { + "epoch": 0.08066093077825577, + "grad_norm": 8.574441814537423, + "learning_rate": 5e-05, + "loss": 0.1341, + "num_input_tokens_seen": 80022760, + "step": 825 + }, + { + "epoch": 0.08066093077825577, + "loss": 0.18064261972904205, + "loss_ce": 0.0029082493856549263, + "loss_iou": 0.48828125, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 80022760, + "step": 825 + }, + { + "epoch": 0.08075870160344154, + "grad_norm": 15.232949627933344, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 80119800, + "step": 826 + }, + { + "epoch": 0.08075870160344154, + "loss": 0.1328796148300171, + "loss_ce": 0.0045226761139929295, + "loss_iou": 0.380859375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 80119800, + "step": 826 + }, + { + "epoch": 0.0808564724286273, + "grad_norm": 5.252499520732791, + "learning_rate": 5e-05, + "loss": 0.1422, + "num_input_tokens_seen": 80216252, + "step": 827 + }, + { + "epoch": 0.0808564724286273, + "loss": 0.1186683252453804, + "loss_ce": 0.007126571610569954, + "loss_iou": 0.39453125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 80216252, + "step": 827 + }, + { + "epoch": 0.08095424325381306, + "grad_norm": 7.263617805610488, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 80312376, + "step": 828 + }, + { + "epoch": 0.08095424325381306, + "loss": 0.10970419645309448, + "loss_ce": 0.006341160275042057, + "loss_iou": 0.439453125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 80312376, + "step": 828 + }, + { + "epoch": 0.08105201407899883, + "grad_norm": 23.192911731035867, + "learning_rate": 5e-05, + "loss": 0.1422, + "num_input_tokens_seen": 80409420, + "step": 829 + }, + { + "epoch": 0.08105201407899883, + "loss": 0.1948758363723755, + "loss_ce": 0.003927350044250488, + "loss_iou": 0.359375, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 80409420, + "step": 829 + }, + { + "epoch": 0.08114978490418459, + "grad_norm": 5.90199109822648, + "learning_rate": 5e-05, + "loss": 0.1487, + "num_input_tokens_seen": 80506312, + "step": 830 + }, + { + "epoch": 0.08114978490418459, + "loss": 0.1551854908466339, + "loss_ce": 0.003696228377521038, + "loss_iou": 0.41796875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 80506312, + "step": 830 + }, + { + "epoch": 0.08124755572937035, + "grad_norm": 3.579023149403936, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 80602568, + "step": 831 + }, + { + "epoch": 0.08124755572937035, + "loss": 0.0862211361527443, + "loss_ce": 0.0045560928992927074, + "loss_iou": 0.39453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 80602568, + "step": 831 + }, + { + "epoch": 0.08134532655455612, + "grad_norm": 11.867509373143324, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 80699636, + "step": 832 + }, + { + "epoch": 0.08134532655455612, + "loss": 0.10145359486341476, + "loss_ce": 0.003827866166830063, + "loss_iou": 0.29296875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 80699636, + "step": 832 + }, + { + "epoch": 0.08144309737974188, + "grad_norm": 12.578924160791596, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 80795844, + "step": 833 + }, + { + "epoch": 0.08144309737974188, + "loss": 0.07262219488620758, + "loss_ce": 0.007177246734499931, + "loss_iou": 0.4375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 80795844, + "step": 833 + }, + { + "epoch": 0.08154086820492765, + "grad_norm": 8.19627602778101, + "learning_rate": 5e-05, + "loss": 0.1619, + "num_input_tokens_seen": 80893900, + "step": 834 + }, + { + "epoch": 0.08154086820492765, + "loss": 0.19520354270935059, + "loss_ce": 0.007093188352882862, + "loss_iou": 0.41796875, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 80893900, + "step": 834 + }, + { + "epoch": 0.08163863903011341, + "grad_norm": 6.0451591562113896, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 80991276, + "step": 835 + }, + { + "epoch": 0.08163863903011341, + "loss": 0.07448810338973999, + "loss_ce": 0.004511293023824692, + "loss_iou": 0.365234375, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 80991276, + "step": 835 + }, + { + "epoch": 0.08173640985529917, + "grad_norm": 14.306274114654924, + "learning_rate": 5e-05, + "loss": 0.1525, + "num_input_tokens_seen": 81088180, + "step": 836 + }, + { + "epoch": 0.08173640985529917, + "loss": 0.14084875583648682, + "loss_ce": 0.007242798339575529, + "loss_iou": 0.470703125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 81088180, + "step": 836 + }, + { + "epoch": 0.08183418068048494, + "grad_norm": 15.797730672414641, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 81185684, + "step": 837 + }, + { + "epoch": 0.08183418068048494, + "loss": 0.10102343559265137, + "loss_ce": 0.004664185456931591, + "loss_iou": 0.515625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 81185684, + "step": 837 + }, + { + "epoch": 0.0819319515056707, + "grad_norm": 5.596033060832629, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 81282200, + "step": 838 + }, + { + "epoch": 0.0819319515056707, + "loss": 0.10338686406612396, + "loss_ce": 0.009362204000353813, + "loss_iou": 0.345703125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 81282200, + "step": 838 + }, + { + "epoch": 0.08202972233085647, + "grad_norm": 6.7942834482041565, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 81379512, + "step": 839 + }, + { + "epoch": 0.08202972233085647, + "loss": 0.14392243325710297, + "loss_ce": 0.007020584307610989, + "loss_iou": 0.50390625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 81379512, + "step": 839 + }, + { + "epoch": 0.08212749315604223, + "grad_norm": 4.6094006001693515, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 81476752, + "step": 840 + }, + { + "epoch": 0.08212749315604223, + "loss": 0.09846052527427673, + "loss_ce": 0.005351397208869457, + "loss_iou": 0.4296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 81476752, + "step": 840 + }, + { + "epoch": 0.08222526398122801, + "grad_norm": 6.915593959589993, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 81573428, + "step": 841 + }, + { + "epoch": 0.08222526398122801, + "loss": 0.10675874352455139, + "loss_ce": 0.004036569967865944, + "loss_iou": 0.466796875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 81573428, + "step": 841 + }, + { + "epoch": 0.08232303480641377, + "grad_norm": 7.091505857296981, + "learning_rate": 5e-05, + "loss": 0.1375, + "num_input_tokens_seen": 81670956, + "step": 842 + }, + { + "epoch": 0.08232303480641377, + "loss": 0.16754461824893951, + "loss_ce": 0.010722415521740913, + "loss_iou": 0.28515625, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 81670956, + "step": 842 + }, + { + "epoch": 0.08242080563159954, + "grad_norm": 18.07332844404213, + "learning_rate": 5e-05, + "loss": 0.1259, + "num_input_tokens_seen": 81768100, + "step": 843 + }, + { + "epoch": 0.08242080563159954, + "loss": 0.11995004117488861, + "loss_ce": 0.0036170303355902433, + "loss_iou": 0.373046875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 81768100, + "step": 843 + }, + { + "epoch": 0.0825185764567853, + "grad_norm": 5.237372463189438, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 81865280, + "step": 844 + }, + { + "epoch": 0.0825185764567853, + "loss": 0.0928104966878891, + "loss_ce": 0.007056106347590685, + "loss_iou": 0.453125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 81865280, + "step": 844 + }, + { + "epoch": 0.08261634728197106, + "grad_norm": 5.220555459882361, + "learning_rate": 5e-05, + "loss": 0.1669, + "num_input_tokens_seen": 81961688, + "step": 845 + }, + { + "epoch": 0.08261634728197106, + "loss": 0.16434930264949799, + "loss_ce": 0.007763613015413284, + "loss_iou": 0.20703125, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 81961688, + "step": 845 + }, + { + "epoch": 0.08271411810715683, + "grad_norm": 9.005673280374582, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 82058300, + "step": 846 + }, + { + "epoch": 0.08271411810715683, + "loss": 0.09021444618701935, + "loss_ce": 0.004917818121612072, + "loss_iou": 0.4453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 82058300, + "step": 846 + }, + { + "epoch": 0.08281188893234259, + "grad_norm": 13.077895015785595, + "learning_rate": 5e-05, + "loss": 0.1473, + "num_input_tokens_seen": 82155028, + "step": 847 + }, + { + "epoch": 0.08281188893234259, + "loss": 0.16341567039489746, + "loss_ce": 0.01241468545049429, + "loss_iou": 0.283203125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 82155028, + "step": 847 + }, + { + "epoch": 0.08290965975752836, + "grad_norm": 7.723174679663244, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 82252440, + "step": 848 + }, + { + "epoch": 0.08290965975752836, + "loss": 0.1197693943977356, + "loss_ce": 0.010501200333237648, + "loss_iou": 0.361328125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 82252440, + "step": 848 + }, + { + "epoch": 0.08300743058271412, + "grad_norm": 5.754475555700876, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 82349152, + "step": 849 + }, + { + "epoch": 0.08300743058271412, + "loss": 0.10072924196720123, + "loss_ce": 0.007955804467201233, + "loss_iou": 0.390625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 82349152, + "step": 849 + }, + { + "epoch": 0.08310520140789988, + "grad_norm": 7.714170715987089, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 82445660, + "step": 850 + }, + { + "epoch": 0.08310520140789988, + "loss": 0.11354352533817291, + "loss_ce": 0.00515271769836545, + "loss_iou": 0.294921875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 82445660, + "step": 850 + }, + { + "epoch": 0.08320297223308565, + "grad_norm": 7.870281316970052, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 82543012, + "step": 851 + }, + { + "epoch": 0.08320297223308565, + "loss": 0.08999057114124298, + "loss_ce": 0.004297209437936544, + "loss_iou": 0.34375, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 82543012, + "step": 851 + }, + { + "epoch": 0.08330074305827141, + "grad_norm": 43.000998638876226, + "learning_rate": 5e-05, + "loss": 0.1318, + "num_input_tokens_seen": 82639036, + "step": 852 + }, + { + "epoch": 0.08330074305827141, + "loss": 0.13513585925102234, + "loss_ce": 0.006687371991574764, + "loss_iou": 0.447265625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 82639036, + "step": 852 + }, + { + "epoch": 0.08339851388345718, + "grad_norm": 11.435422577098683, + "learning_rate": 5e-05, + "loss": 0.1488, + "num_input_tokens_seen": 82736012, + "step": 853 + }, + { + "epoch": 0.08339851388345718, + "loss": 0.175950288772583, + "loss_ce": 0.015481246635317802, + "loss_iou": 0.30078125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 82736012, + "step": 853 + }, + { + "epoch": 0.08349628470864294, + "grad_norm": 9.335216223860709, + "learning_rate": 5e-05, + "loss": 0.1415, + "num_input_tokens_seen": 82833536, + "step": 854 + }, + { + "epoch": 0.08349628470864294, + "loss": 0.16788271069526672, + "loss_ce": 0.01834658347070217, + "loss_iou": 0.421875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 82833536, + "step": 854 + }, + { + "epoch": 0.0835940555338287, + "grad_norm": 14.815359550992769, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 82930276, + "step": 855 + }, + { + "epoch": 0.0835940555338287, + "loss": 0.09165146946907043, + "loss_ce": 0.014106299728155136, + "loss_iou": 0.43359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 82930276, + "step": 855 + }, + { + "epoch": 0.08369182635901447, + "grad_norm": 6.565802660693666, + "learning_rate": 5e-05, + "loss": 0.1163, + "num_input_tokens_seen": 83027220, + "step": 856 + }, + { + "epoch": 0.08369182635901447, + "loss": 0.08573964238166809, + "loss_ce": 0.006424454506486654, + "loss_iou": 0.46484375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 83027220, + "step": 856 + }, + { + "epoch": 0.08378959718420023, + "grad_norm": 15.08814918681179, + "learning_rate": 5e-05, + "loss": 0.1349, + "num_input_tokens_seen": 83123524, + "step": 857 + }, + { + "epoch": 0.08378959718420023, + "loss": 0.10407854616641998, + "loss_ce": 0.0053236656822264194, + "loss_iou": 0.212890625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 83123524, + "step": 857 + }, + { + "epoch": 0.083887368009386, + "grad_norm": 11.19135128441448, + "learning_rate": 5e-05, + "loss": 0.1453, + "num_input_tokens_seen": 83220428, + "step": 858 + }, + { + "epoch": 0.083887368009386, + "loss": 0.1489315629005432, + "loss_ce": 0.005773609504103661, + "loss_iou": 0.376953125, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 83220428, + "step": 858 + }, + { + "epoch": 0.08398513883457176, + "grad_norm": 15.532145915686737, + "learning_rate": 5e-05, + "loss": 0.1469, + "num_input_tokens_seen": 83318680, + "step": 859 + }, + { + "epoch": 0.08398513883457176, + "loss": 0.14387118816375732, + "loss_ce": 0.00544345285743475, + "loss_iou": 0.671875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 83318680, + "step": 859 + }, + { + "epoch": 0.08408290965975752, + "grad_norm": 12.403955509836305, + "learning_rate": 5e-05, + "loss": 0.1205, + "num_input_tokens_seen": 83414932, + "step": 860 + }, + { + "epoch": 0.08408290965975752, + "loss": 0.09754766523838043, + "loss_ce": 0.007673397194594145, + "loss_iou": 0.39453125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 83414932, + "step": 860 + }, + { + "epoch": 0.08418068048494329, + "grad_norm": 7.254592599471678, + "learning_rate": 5e-05, + "loss": 0.1182, + "num_input_tokens_seen": 83512136, + "step": 861 + }, + { + "epoch": 0.08418068048494329, + "loss": 0.114432692527771, + "loss_ce": 0.003531813621520996, + "loss_iou": 0.435546875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 83512136, + "step": 861 + }, + { + "epoch": 0.08427845131012905, + "grad_norm": 4.743794239579672, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 83609032, + "step": 862 + }, + { + "epoch": 0.08427845131012905, + "loss": 0.124659463763237, + "loss_ce": 0.0052746981382369995, + "loss_iou": 0.376953125, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 83609032, + "step": 862 + }, + { + "epoch": 0.08437622213531482, + "grad_norm": 5.502290078018058, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 83706280, + "step": 863 + }, + { + "epoch": 0.08437622213531482, + "loss": 0.08361081779003143, + "loss_ce": 0.003761576721444726, + "loss_iou": 0.40234375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 83706280, + "step": 863 + }, + { + "epoch": 0.0844739929605006, + "grad_norm": 8.483886519319631, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 83803160, + "step": 864 + }, + { + "epoch": 0.0844739929605006, + "loss": 0.11865629255771637, + "loss_ce": 0.0066873012110590935, + "loss_iou": 0.40625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 83803160, + "step": 864 + }, + { + "epoch": 0.08457176378568636, + "grad_norm": 13.752152376739943, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 83900396, + "step": 865 + }, + { + "epoch": 0.08457176378568636, + "loss": 0.10899704694747925, + "loss_ce": 0.003192601492628455, + "loss_iou": 0.376953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 83900396, + "step": 865 + }, + { + "epoch": 0.08466953461087212, + "grad_norm": 11.59657450512771, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 83996696, + "step": 866 + }, + { + "epoch": 0.08466953461087212, + "loss": 0.09064459800720215, + "loss_ce": 0.005866764113306999, + "loss_iou": 0.37890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 83996696, + "step": 866 + }, + { + "epoch": 0.08476730543605789, + "grad_norm": 7.706290682950301, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 84094504, + "step": 867 + }, + { + "epoch": 0.08476730543605789, + "loss": 0.10851354151964188, + "loss_ce": 0.005303090903908014, + "loss_iou": 0.451171875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 84094504, + "step": 867 + }, + { + "epoch": 0.08486507626124365, + "grad_norm": 6.499870947553273, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 84191472, + "step": 868 + }, + { + "epoch": 0.08486507626124365, + "loss": 0.08403760194778442, + "loss_ce": 0.003730595577508211, + "loss_iou": 0.43359375, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 84191472, + "step": 868 + }, + { + "epoch": 0.08496284708642941, + "grad_norm": 10.312967155262735, + "learning_rate": 5e-05, + "loss": 0.1223, + "num_input_tokens_seen": 84288308, + "step": 869 + }, + { + "epoch": 0.08496284708642941, + "loss": 0.11003363132476807, + "loss_ce": 0.012163754552602768, + "loss_iou": 0.390625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 84288308, + "step": 869 + }, + { + "epoch": 0.08506061791161518, + "grad_norm": 7.152816073957788, + "learning_rate": 5e-05, + "loss": 0.1413, + "num_input_tokens_seen": 84385308, + "step": 870 + }, + { + "epoch": 0.08506061791161518, + "loss": 0.13476799428462982, + "loss_ce": 0.008028492331504822, + "loss_iou": 0.353515625, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 84385308, + "step": 870 + }, + { + "epoch": 0.08515838873680094, + "grad_norm": 7.295917616479822, + "learning_rate": 5e-05, + "loss": 0.1592, + "num_input_tokens_seen": 84481180, + "step": 871 + }, + { + "epoch": 0.08515838873680094, + "loss": 0.12281209230422974, + "loss_ce": 0.004098714794963598, + "loss_iou": 0.376953125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 84481180, + "step": 871 + }, + { + "epoch": 0.0852561595619867, + "grad_norm": 11.423301196230152, + "learning_rate": 5e-05, + "loss": 0.1363, + "num_input_tokens_seen": 84578264, + "step": 872 + }, + { + "epoch": 0.0852561595619867, + "loss": 0.13280229270458221, + "loss_ce": 0.0041172923520207405, + "loss_iou": 0.330078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 84578264, + "step": 872 + }, + { + "epoch": 0.08535393038717247, + "grad_norm": 7.588892020272429, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 84676052, + "step": 873 + }, + { + "epoch": 0.08535393038717247, + "loss": 0.09100263565778732, + "loss_ce": 0.002669507171958685, + "loss_iou": 0.333984375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 84676052, + "step": 873 + }, + { + "epoch": 0.08545170121235823, + "grad_norm": 6.815326970348428, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 84773620, + "step": 874 + }, + { + "epoch": 0.08545170121235823, + "loss": 0.08935859799385071, + "loss_ce": 0.006030351854860783, + "loss_iou": 0.419921875, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 84773620, + "step": 874 + }, + { + "epoch": 0.085549472037544, + "grad_norm": 8.436912562591106, + "learning_rate": 5e-05, + "loss": 0.136, + "num_input_tokens_seen": 84870536, + "step": 875 + }, + { + "epoch": 0.085549472037544, + "loss": 0.13760273158550262, + "loss_ce": 0.0058583482168614864, + "loss_iou": 0.404296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 84870536, + "step": 875 + }, + { + "epoch": 0.08564724286272976, + "grad_norm": 8.996475119407561, + "learning_rate": 5e-05, + "loss": 0.1335, + "num_input_tokens_seen": 84967772, + "step": 876 + }, + { + "epoch": 0.08564724286272976, + "loss": 0.11684684455394745, + "loss_ce": 0.0038707624189555645, + "loss_iou": 0.3984375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 84967772, + "step": 876 + }, + { + "epoch": 0.08574501368791553, + "grad_norm": 6.343741074384087, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 85063200, + "step": 877 + }, + { + "epoch": 0.08574501368791553, + "loss": 0.09969726204872131, + "loss_ce": 0.009334715083241463, + "loss_iou": 0.279296875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 85063200, + "step": 877 + }, + { + "epoch": 0.08584278451310129, + "grad_norm": 9.651599329116246, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 85159896, + "step": 878 + }, + { + "epoch": 0.08584278451310129, + "loss": 0.1126350462436676, + "loss_ce": 0.00651779118925333, + "loss_iou": 0.34375, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 85159896, + "step": 878 + }, + { + "epoch": 0.08594055533828705, + "grad_norm": 7.119567865325211, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 85257028, + "step": 879 + }, + { + "epoch": 0.08594055533828705, + "loss": 0.14546240866184235, + "loss_ce": 0.013748539611697197, + "loss_iou": 0.408203125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 85257028, + "step": 879 + }, + { + "epoch": 0.08603832616347282, + "grad_norm": 11.248264382528202, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 85354936, + "step": 880 + }, + { + "epoch": 0.08603832616347282, + "loss": 0.0759134590625763, + "loss_ce": 0.005074536893516779, + "loss_iou": 0.51171875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 85354936, + "step": 880 + }, + { + "epoch": 0.08613609698865858, + "grad_norm": 20.09362971534949, + "learning_rate": 5e-05, + "loss": 0.1332, + "num_input_tokens_seen": 85452516, + "step": 881 + }, + { + "epoch": 0.08613609698865858, + "loss": 0.13187456130981445, + "loss_ce": 0.003517632372677326, + "loss_iou": 0.50390625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 85452516, + "step": 881 + }, + { + "epoch": 0.08623386781384434, + "grad_norm": 8.383649600479252, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 85549948, + "step": 882 + }, + { + "epoch": 0.08623386781384434, + "loss": 0.10743963718414307, + "loss_ce": 0.003496758872643113, + "loss_iou": 0.490234375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 85549948, + "step": 882 + }, + { + "epoch": 0.08633163863903011, + "grad_norm": 6.388556029805833, + "learning_rate": 5e-05, + "loss": 0.1453, + "num_input_tokens_seen": 85647612, + "step": 883 + }, + { + "epoch": 0.08633163863903011, + "loss": 0.13544490933418274, + "loss_ce": 0.006477617658674717, + "loss_iou": 0.4765625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 85647612, + "step": 883 + }, + { + "epoch": 0.08642940946421587, + "grad_norm": 9.109290917831704, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 85744856, + "step": 884 + }, + { + "epoch": 0.08642940946421587, + "loss": 0.11389181017875671, + "loss_ce": 0.006958221085369587, + "loss_iou": 0.330078125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 85744856, + "step": 884 + }, + { + "epoch": 0.08652718028940164, + "grad_norm": 20.887785346373906, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 85841820, + "step": 885 + }, + { + "epoch": 0.08652718028940164, + "loss": 0.14026883244514465, + "loss_ce": 0.007151154801249504, + "loss_iou": 0.431640625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 85841820, + "step": 885 + }, + { + "epoch": 0.0866249511145874, + "grad_norm": 9.492706815400831, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 85938712, + "step": 886 + }, + { + "epoch": 0.0866249511145874, + "loss": 0.11569681018590927, + "loss_ce": 0.0067185405641794205, + "loss_iou": 0.419921875, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 85938712, + "step": 886 + }, + { + "epoch": 0.08672272193977318, + "grad_norm": 4.485220357518862, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 86035616, + "step": 887 + }, + { + "epoch": 0.08672272193977318, + "loss": 0.07229511439800262, + "loss_ce": 0.005034366622567177, + "loss_iou": 0.4375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 86035616, + "step": 887 + }, + { + "epoch": 0.08682049276495894, + "grad_norm": 10.865130185325926, + "learning_rate": 5e-05, + "loss": 0.1343, + "num_input_tokens_seen": 86133240, + "step": 888 + }, + { + "epoch": 0.08682049276495894, + "loss": 0.15544912219047546, + "loss_ce": 0.009117345325648785, + "loss_iou": 0.451171875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 86133240, + "step": 888 + }, + { + "epoch": 0.0869182635901447, + "grad_norm": 13.683657858561949, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 86229788, + "step": 889 + }, + { + "epoch": 0.0869182635901447, + "loss": 0.10345786809921265, + "loss_ce": 0.008273540064692497, + "loss_iou": 0.189453125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 86229788, + "step": 889 + }, + { + "epoch": 0.08701603441533047, + "grad_norm": 8.638139168987133, + "learning_rate": 5e-05, + "loss": 0.1265, + "num_input_tokens_seen": 86326420, + "step": 890 + }, + { + "epoch": 0.08701603441533047, + "loss": 0.11787749826908112, + "loss_ce": 0.0056719910353422165, + "loss_iou": 0.37890625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 86326420, + "step": 890 + }, + { + "epoch": 0.08711380524051623, + "grad_norm": 6.11128785866721, + "learning_rate": 5e-05, + "loss": 0.141, + "num_input_tokens_seen": 86423408, + "step": 891 + }, + { + "epoch": 0.08711380524051623, + "loss": 0.11744146049022675, + "loss_ce": 0.009043024852871895, + "loss_iou": 0.36328125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 86423408, + "step": 891 + }, + { + "epoch": 0.087211576065702, + "grad_norm": 11.701847016343644, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 86520452, + "step": 892 + }, + { + "epoch": 0.087211576065702, + "loss": 0.11946950852870941, + "loss_ce": 0.00621876772493124, + "loss_iou": 0.494140625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 86520452, + "step": 892 + }, + { + "epoch": 0.08730934689088776, + "grad_norm": 13.39965417538291, + "learning_rate": 5e-05, + "loss": 0.1205, + "num_input_tokens_seen": 86617616, + "step": 893 + }, + { + "epoch": 0.08730934689088776, + "loss": 0.10722886025905609, + "loss_ce": 0.003347026417031884, + "loss_iou": 0.373046875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 86617616, + "step": 893 + }, + { + "epoch": 0.08740711771607353, + "grad_norm": 14.943199771326437, + "learning_rate": 5e-05, + "loss": 0.1621, + "num_input_tokens_seen": 86714128, + "step": 894 + }, + { + "epoch": 0.08740711771607353, + "loss": 0.21032270789146423, + "loss_ce": 0.0040238662622869015, + "loss_iou": 0.30078125, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 86714128, + "step": 894 + }, + { + "epoch": 0.08750488854125929, + "grad_norm": 5.710651799324401, + "learning_rate": 5e-05, + "loss": 0.1364, + "num_input_tokens_seen": 86810660, + "step": 895 + }, + { + "epoch": 0.08750488854125929, + "loss": 0.1094869077205658, + "loss_ce": 0.008382173255085945, + "loss_iou": 0.4296875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 86810660, + "step": 895 + }, + { + "epoch": 0.08760265936644505, + "grad_norm": 3.6009694726329857, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 86907372, + "step": 896 + }, + { + "epoch": 0.08760265936644505, + "loss": 0.10124780237674713, + "loss_ce": 0.005132689140737057, + "loss_iou": 0.435546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 86907372, + "step": 896 + }, + { + "epoch": 0.08770043019163082, + "grad_norm": 6.4323720330858425, + "learning_rate": 5e-05, + "loss": 0.1611, + "num_input_tokens_seen": 87005472, + "step": 897 + }, + { + "epoch": 0.08770043019163082, + "loss": 0.1498754322528839, + "loss_ce": 0.005283161532133818, + "loss_iou": 0.61328125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 87005472, + "step": 897 + }, + { + "epoch": 0.08779820101681658, + "grad_norm": 5.381840382429433, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 87102228, + "step": 898 + }, + { + "epoch": 0.08779820101681658, + "loss": 0.11061285436153412, + "loss_ce": 0.005083071067929268, + "loss_iou": 0.345703125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 87102228, + "step": 898 + }, + { + "epoch": 0.08789597184200235, + "grad_norm": 8.814690159843936, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 87199328, + "step": 899 + }, + { + "epoch": 0.08789597184200235, + "loss": 0.09741802513599396, + "loss_ce": 0.004034245852380991, + "loss_iou": 0.390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 87199328, + "step": 899 + }, + { + "epoch": 0.08799374266718811, + "grad_norm": 15.99148413797537, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 87295844, + "step": 900 + }, + { + "epoch": 0.08799374266718811, + "loss": 0.052055347710847855, + "loss_ce": 0.004547108896076679, + "loss_iou": 0.26171875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 87295844, + "step": 900 + }, + { + "epoch": 0.08809151349237387, + "grad_norm": 4.818969682275044, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 87391736, + "step": 901 + }, + { + "epoch": 0.08809151349237387, + "loss": 0.07552629709243774, + "loss_ce": 0.008631765842437744, + "loss_iou": 0.2734375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 87391736, + "step": 901 + }, + { + "epoch": 0.08818928431755964, + "grad_norm": 18.52181846817964, + "learning_rate": 5e-05, + "loss": 0.1467, + "num_input_tokens_seen": 87488372, + "step": 902 + }, + { + "epoch": 0.08818928431755964, + "loss": 0.10341969132423401, + "loss_ce": 0.0059923226945102215, + "loss_iou": 0.357421875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 87488372, + "step": 902 + }, + { + "epoch": 0.0882870551427454, + "grad_norm": 9.358085650556646, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 87584892, + "step": 903 + }, + { + "epoch": 0.0882870551427454, + "loss": 0.1474933922290802, + "loss_ce": 0.008531596511602402, + "loss_iou": 0.32421875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 87584892, + "step": 903 + }, + { + "epoch": 0.08838482596793117, + "grad_norm": 7.294474457991304, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 87682420, + "step": 904 + }, + { + "epoch": 0.08838482596793117, + "loss": 0.12310962378978729, + "loss_ce": 0.008241463452577591, + "loss_iou": 0.349609375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 87682420, + "step": 904 + }, + { + "epoch": 0.08848259679311693, + "grad_norm": 7.390447665625824, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 87779060, + "step": 905 + }, + { + "epoch": 0.08848259679311693, + "loss": 0.08537651598453522, + "loss_ce": 0.005023733712732792, + "loss_iou": 0.451171875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 87779060, + "step": 905 + }, + { + "epoch": 0.0885803676183027, + "grad_norm": 26.303348995802207, + "learning_rate": 5e-05, + "loss": 0.1554, + "num_input_tokens_seen": 87876600, + "step": 906 + }, + { + "epoch": 0.0885803676183027, + "loss": 0.1255778968334198, + "loss_ce": 0.0036296576727181673, + "loss_iou": 0.4921875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 87876600, + "step": 906 + }, + { + "epoch": 0.08867813844348846, + "grad_norm": 4.317553602116972, + "learning_rate": 5e-05, + "loss": 0.1398, + "num_input_tokens_seen": 87973292, + "step": 907 + }, + { + "epoch": 0.08867813844348846, + "loss": 0.1587638258934021, + "loss_ce": 0.0036734873428940773, + "loss_iou": 0.365234375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 87973292, + "step": 907 + }, + { + "epoch": 0.08877590926867422, + "grad_norm": 7.837368205806641, + "learning_rate": 5e-05, + "loss": 0.1787, + "num_input_tokens_seen": 88070772, + "step": 908 + }, + { + "epoch": 0.08877590926867422, + "loss": 0.21278604865074158, + "loss_ce": 0.008867593482136726, + "loss_iou": 0.326171875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 88070772, + "step": 908 + }, + { + "epoch": 0.08887368009385999, + "grad_norm": 10.278171414087943, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 88167432, + "step": 909 + }, + { + "epoch": 0.08887368009385999, + "loss": 0.09796654433012009, + "loss_ce": 0.0047811176627874374, + "loss_iou": 0.4453125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 88167432, + "step": 909 + }, + { + "epoch": 0.08897145091904576, + "grad_norm": 8.37005377411775, + "learning_rate": 5e-05, + "loss": 0.1488, + "num_input_tokens_seen": 88264888, + "step": 910 + }, + { + "epoch": 0.08897145091904576, + "loss": 0.14165037870407104, + "loss_ce": 0.005633527413010597, + "loss_iou": 0.369140625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 88264888, + "step": 910 + }, + { + "epoch": 0.08906922174423153, + "grad_norm": 6.503684455735759, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 88361016, + "step": 911 + }, + { + "epoch": 0.08906922174423153, + "loss": 0.1247580498456955, + "loss_ce": 0.007936755195260048, + "loss_iou": 0.453125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 88361016, + "step": 911 + }, + { + "epoch": 0.08916699256941729, + "grad_norm": 6.90738767530963, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 88458968, + "step": 912 + }, + { + "epoch": 0.08916699256941729, + "loss": 0.11045897006988525, + "loss_ce": 0.006210923194885254, + "loss_iou": 0.5078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 88458968, + "step": 912 + }, + { + "epoch": 0.08926476339460306, + "grad_norm": 23.715236001865694, + "learning_rate": 5e-05, + "loss": 0.1441, + "num_input_tokens_seen": 88555916, + "step": 913 + }, + { + "epoch": 0.08926476339460306, + "loss": 0.1380654126405716, + "loss_ce": 0.005985339172184467, + "loss_iou": 0.51953125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 88555916, + "step": 913 + }, + { + "epoch": 0.08936253421978882, + "grad_norm": 5.045212785992016, + "learning_rate": 5e-05, + "loss": 0.1475, + "num_input_tokens_seen": 88653696, + "step": 914 + }, + { + "epoch": 0.08936253421978882, + "loss": 0.1649712175130844, + "loss_ce": 0.007744654081761837, + "loss_iou": 0.40234375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 88653696, + "step": 914 + }, + { + "epoch": 0.08946030504497458, + "grad_norm": 6.198944700509711, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 88751176, + "step": 915 + }, + { + "epoch": 0.08946030504497458, + "loss": 0.09565235674381256, + "loss_ce": 0.006876722909510136, + "loss_iou": 0.400390625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 88751176, + "step": 915 + }, + { + "epoch": 0.08955807587016035, + "grad_norm": 21.093201038049845, + "learning_rate": 5e-05, + "loss": 0.1983, + "num_input_tokens_seen": 88848232, + "step": 916 + }, + { + "epoch": 0.08955807587016035, + "loss": 0.21665498614311218, + "loss_ce": 0.00818941742181778, + "loss_iou": 0.439453125, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 88848232, + "step": 916 + }, + { + "epoch": 0.08965584669534611, + "grad_norm": 5.872521617350772, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 88946156, + "step": 917 + }, + { + "epoch": 0.08965584669534611, + "loss": 0.13495245575904846, + "loss_ce": 0.0022009829990565777, + "loss_iou": 0.42578125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 88946156, + "step": 917 + }, + { + "epoch": 0.08975361752053188, + "grad_norm": 6.387253461488091, + "learning_rate": 5e-05, + "loss": 0.1304, + "num_input_tokens_seen": 89043332, + "step": 918 + }, + { + "epoch": 0.08975361752053188, + "loss": 0.11070594936609268, + "loss_ce": 0.00905190221965313, + "loss_iou": 0.27734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 89043332, + "step": 918 + }, + { + "epoch": 0.08985138834571764, + "grad_norm": 3.1752260697216026, + "learning_rate": 5e-05, + "loss": 0.1319, + "num_input_tokens_seen": 89139792, + "step": 919 + }, + { + "epoch": 0.08985138834571764, + "loss": 0.1504538357257843, + "loss_ce": 0.0052512027323246, + "loss_iou": 0.357421875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 89139792, + "step": 919 + }, + { + "epoch": 0.0899491591709034, + "grad_norm": 10.228050845473652, + "learning_rate": 5e-05, + "loss": 0.1408, + "num_input_tokens_seen": 89236556, + "step": 920 + }, + { + "epoch": 0.0899491591709034, + "loss": 0.1228858157992363, + "loss_ce": 0.008521187119185925, + "loss_iou": 0.29296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 89236556, + "step": 920 + }, + { + "epoch": 0.09004692999608917, + "grad_norm": 10.300591026203346, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 89333476, + "step": 921 + }, + { + "epoch": 0.09004692999608917, + "loss": 0.08413834869861603, + "loss_ce": 0.007050946354866028, + "loss_iou": 0.3671875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 89333476, + "step": 921 + }, + { + "epoch": 0.09014470082127493, + "grad_norm": 11.01308051975482, + "learning_rate": 5e-05, + "loss": 0.1265, + "num_input_tokens_seen": 89430624, + "step": 922 + }, + { + "epoch": 0.09014470082127493, + "loss": 0.12231782078742981, + "loss_ce": 0.009433296509087086, + "loss_iou": 0.380859375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 89430624, + "step": 922 + }, + { + "epoch": 0.0902424716464607, + "grad_norm": 12.099715453515358, + "learning_rate": 5e-05, + "loss": 0.1419, + "num_input_tokens_seen": 89527648, + "step": 923 + }, + { + "epoch": 0.0902424716464607, + "loss": 0.17302662134170532, + "loss_ce": 0.009116701781749725, + "loss_iou": 0.470703125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 89527648, + "step": 923 + }, + { + "epoch": 0.09034024247164646, + "grad_norm": 8.210885498114543, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 89625344, + "step": 924 + }, + { + "epoch": 0.09034024247164646, + "loss": 0.11610198765993118, + "loss_ce": 0.005140069872140884, + "loss_iou": 0.39453125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 89625344, + "step": 924 + }, + { + "epoch": 0.09043801329683222, + "grad_norm": 3.266361204289022, + "learning_rate": 5e-05, + "loss": 0.1628, + "num_input_tokens_seen": 89722324, + "step": 925 + }, + { + "epoch": 0.09043801329683222, + "loss": 0.10900717973709106, + "loss_ce": 0.004087749868631363, + "loss_iou": 0.34765625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 89722324, + "step": 925 + }, + { + "epoch": 0.09053578412201799, + "grad_norm": 10.36728452959956, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 89818276, + "step": 926 + }, + { + "epoch": 0.09053578412201799, + "loss": 0.07753605395555496, + "loss_ce": 0.0025257556699216366, + "loss_iou": 0.40625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 89818276, + "step": 926 + }, + { + "epoch": 0.09063355494720375, + "grad_norm": 6.5752033086244746, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 89915384, + "step": 927 + }, + { + "epoch": 0.09063355494720375, + "loss": 0.149050772190094, + "loss_ce": 0.0037871049717068672, + "loss_iou": 0.51953125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 89915384, + "step": 927 + }, + { + "epoch": 0.09073132577238952, + "grad_norm": 26.05539929640776, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 90011724, + "step": 928 + }, + { + "epoch": 0.09073132577238952, + "loss": 0.09091906249523163, + "loss_ce": 0.0033755693584680557, + "loss_iou": 0.37109375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 90011724, + "step": 928 + }, + { + "epoch": 0.09082909659757528, + "grad_norm": 3.5627698497272533, + "learning_rate": 5e-05, + "loss": 0.1411, + "num_input_tokens_seen": 90108464, + "step": 929 + }, + { + "epoch": 0.09082909659757528, + "loss": 0.10896699875593185, + "loss_ce": 0.005878619384020567, + "loss_iou": 0.55078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 90108464, + "step": 929 + }, + { + "epoch": 0.09092686742276104, + "grad_norm": 12.850936395542984, + "learning_rate": 5e-05, + "loss": 0.148, + "num_input_tokens_seen": 90206640, + "step": 930 + }, + { + "epoch": 0.09092686742276104, + "loss": 0.13991102576255798, + "loss_ce": 0.009478900581598282, + "loss_iou": 0.4140625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 90206640, + "step": 930 + }, + { + "epoch": 0.0910246382479468, + "grad_norm": 6.202196191096953, + "learning_rate": 5e-05, + "loss": 0.1319, + "num_input_tokens_seen": 90304012, + "step": 931 + }, + { + "epoch": 0.0910246382479468, + "loss": 0.1507640779018402, + "loss_ce": 0.007087322883307934, + "loss_iou": 0.326171875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 90304012, + "step": 931 + }, + { + "epoch": 0.09112240907313257, + "grad_norm": 3.058267452732514, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 90401376, + "step": 932 + }, + { + "epoch": 0.09112240907313257, + "loss": 0.12865833938121796, + "loss_ce": 0.010433249175548553, + "loss_iou": 0.3671875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 90401376, + "step": 932 + }, + { + "epoch": 0.09122017989831835, + "grad_norm": 9.626716663901412, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 90496928, + "step": 933 + }, + { + "epoch": 0.09122017989831835, + "loss": 0.11615574359893799, + "loss_ce": 0.007421610876917839, + "loss_iou": 0.416015625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 90496928, + "step": 933 + }, + { + "epoch": 0.09131795072350411, + "grad_norm": 15.189319895746602, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 90594452, + "step": 934 + }, + { + "epoch": 0.09131795072350411, + "loss": 0.09343735128641129, + "loss_ce": 0.003372343722730875, + "loss_iou": 0.373046875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 90594452, + "step": 934 + }, + { + "epoch": 0.09141572154868988, + "grad_norm": 9.103362508149202, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 90691396, + "step": 935 + }, + { + "epoch": 0.09141572154868988, + "loss": 0.10204434394836426, + "loss_ce": 0.005074744112789631, + "loss_iou": 0.375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 90691396, + "step": 935 + }, + { + "epoch": 0.09151349237387564, + "grad_norm": 9.944594495852236, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 90789564, + "step": 936 + }, + { + "epoch": 0.09151349237387564, + "loss": 0.10750700533390045, + "loss_ce": 0.0025570495054125786, + "loss_iou": 0.5078125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 90789564, + "step": 936 + }, + { + "epoch": 0.0916112631990614, + "grad_norm": 6.656516009802005, + "learning_rate": 5e-05, + "loss": 0.1305, + "num_input_tokens_seen": 90885868, + "step": 937 + }, + { + "epoch": 0.0916112631990614, + "loss": 0.11826390027999878, + "loss_ce": 0.0031821089796721935, + "loss_iou": 0.26953125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 90885868, + "step": 937 + }, + { + "epoch": 0.09170903402424717, + "grad_norm": 5.655098383080383, + "learning_rate": 5e-05, + "loss": 0.1227, + "num_input_tokens_seen": 90982176, + "step": 938 + }, + { + "epoch": 0.09170903402424717, + "loss": 0.08786371350288391, + "loss_ce": 0.005862983409315348, + "loss_iou": 0.322265625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 90982176, + "step": 938 + }, + { + "epoch": 0.09180680484943293, + "grad_norm": 10.155288754800821, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 91078580, + "step": 939 + }, + { + "epoch": 0.09180680484943293, + "loss": 0.1053093746304512, + "loss_ce": 0.0013054704759269953, + "loss_iou": 0.48046875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 91078580, + "step": 939 + }, + { + "epoch": 0.0919045756746187, + "grad_norm": 13.745837726793805, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 91175708, + "step": 940 + }, + { + "epoch": 0.0919045756746187, + "loss": 0.11647948622703552, + "loss_ce": 0.009301758371293545, + "loss_iou": 0.482421875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 91175708, + "step": 940 + }, + { + "epoch": 0.09200234649980446, + "grad_norm": 13.63704386658312, + "learning_rate": 5e-05, + "loss": 0.1182, + "num_input_tokens_seen": 91272748, + "step": 941 + }, + { + "epoch": 0.09200234649980446, + "loss": 0.12959884107112885, + "loss_ce": 0.010977008379995823, + "loss_iou": 0.33203125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 91272748, + "step": 941 + }, + { + "epoch": 0.09210011732499022, + "grad_norm": 14.42101223343772, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 91369908, + "step": 942 + }, + { + "epoch": 0.09210011732499022, + "loss": 0.11361634731292725, + "loss_ce": 0.005065326578915119, + "loss_iou": 0.376953125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 91369908, + "step": 942 + }, + { + "epoch": 0.09219788815017599, + "grad_norm": 10.06742931769503, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 91467100, + "step": 943 + }, + { + "epoch": 0.09219788815017599, + "loss": 0.09037785232067108, + "loss_ce": 0.0044556064531207085, + "loss_iou": 0.4765625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 91467100, + "step": 943 + }, + { + "epoch": 0.09229565897536175, + "grad_norm": 5.589678862327773, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 91564112, + "step": 944 + }, + { + "epoch": 0.09229565897536175, + "loss": 0.09379100054502487, + "loss_ce": 0.007464396767318249, + "loss_iou": 0.42578125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 91564112, + "step": 944 + }, + { + "epoch": 0.09239342980054752, + "grad_norm": 4.338543849767432, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 91661272, + "step": 945 + }, + { + "epoch": 0.09239342980054752, + "loss": 0.09590204060077667, + "loss_ce": 0.004211978521198034, + "loss_iou": 0.3203125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 91661272, + "step": 945 + }, + { + "epoch": 0.09249120062573328, + "grad_norm": 5.138244797481919, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 91758424, + "step": 946 + }, + { + "epoch": 0.09249120062573328, + "loss": 0.11152123659849167, + "loss_ce": 0.007395259104669094, + "loss_iou": 0.490234375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 91758424, + "step": 946 + }, + { + "epoch": 0.09258897145091904, + "grad_norm": 13.928322986737742, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 91855912, + "step": 947 + }, + { + "epoch": 0.09258897145091904, + "loss": 0.1357365846633911, + "loss_ce": 0.005411266814917326, + "loss_iou": 0.48046875, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 91855912, + "step": 947 + }, + { + "epoch": 0.09268674227610481, + "grad_norm": 21.172903244398718, + "learning_rate": 5e-05, + "loss": 0.1337, + "num_input_tokens_seen": 91953536, + "step": 948 + }, + { + "epoch": 0.09268674227610481, + "loss": 0.1278298795223236, + "loss_ce": 0.010337209329009056, + "loss_iou": 0.38671875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 91953536, + "step": 948 + }, + { + "epoch": 0.09278451310129057, + "grad_norm": 17.548715335193474, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 92050540, + "step": 949 + }, + { + "epoch": 0.09278451310129057, + "loss": 0.10709355771541595, + "loss_ce": 0.008369196206331253, + "loss_iou": 0.318359375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 92050540, + "step": 949 + }, + { + "epoch": 0.09288228392647634, + "grad_norm": 16.499625900656092, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 92147972, + "step": 950 + }, + { + "epoch": 0.09288228392647634, + "loss": 0.14787504076957703, + "loss_ce": 0.009538855403661728, + "loss_iou": 0.4453125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 92147972, + "step": 950 + }, + { + "epoch": 0.0929800547516621, + "grad_norm": 8.242522419615886, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 92244640, + "step": 951 + }, + { + "epoch": 0.0929800547516621, + "loss": 0.14529120922088623, + "loss_ce": 0.00726020522415638, + "loss_iou": 0.3828125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 92244640, + "step": 951 + }, + { + "epoch": 0.09307782557684786, + "grad_norm": 9.106245511530386, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 92342380, + "step": 952 + }, + { + "epoch": 0.09307782557684786, + "loss": 0.09976451843976974, + "loss_ce": 0.0038782870396971703, + "loss_iou": 0.37890625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 92342380, + "step": 952 + }, + { + "epoch": 0.09317559640203363, + "grad_norm": 23.357747143474572, + "learning_rate": 5e-05, + "loss": 0.1431, + "num_input_tokens_seen": 92439144, + "step": 953 + }, + { + "epoch": 0.09317559640203363, + "loss": 0.1431109607219696, + "loss_ce": 0.005934449844062328, + "loss_iou": 0.34765625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 92439144, + "step": 953 + }, + { + "epoch": 0.09327336722721939, + "grad_norm": 11.189421053394224, + "learning_rate": 5e-05, + "loss": 0.1272, + "num_input_tokens_seen": 92535628, + "step": 954 + }, + { + "epoch": 0.09327336722721939, + "loss": 0.08069826662540436, + "loss_ce": 0.003183616790920496, + "loss_iou": 0.384765625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 92535628, + "step": 954 + }, + { + "epoch": 0.09337113805240516, + "grad_norm": 11.060273070892242, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 92633004, + "step": 955 + }, + { + "epoch": 0.09337113805240516, + "loss": 0.10514390468597412, + "loss_ce": 0.002391224727034569, + "loss_iou": 0.515625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 92633004, + "step": 955 + }, + { + "epoch": 0.09346890887759093, + "grad_norm": 14.153249105603543, + "learning_rate": 5e-05, + "loss": 0.1691, + "num_input_tokens_seen": 92731044, + "step": 956 + }, + { + "epoch": 0.09346890887759093, + "loss": 0.11517596244812012, + "loss_ce": 0.004473449662327766, + "loss_iou": 0.43359375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 92731044, + "step": 956 + }, + { + "epoch": 0.0935666797027767, + "grad_norm": 8.085015424056335, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 92828236, + "step": 957 + }, + { + "epoch": 0.0935666797027767, + "loss": 0.12779530882835388, + "loss_ce": 0.004748442210257053, + "loss_iou": 0.423828125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 92828236, + "step": 957 + }, + { + "epoch": 0.09366445052796246, + "grad_norm": 4.221113793961353, + "learning_rate": 5e-05, + "loss": 0.1299, + "num_input_tokens_seen": 92925732, + "step": 958 + }, + { + "epoch": 0.09366445052796246, + "loss": 0.11602473258972168, + "loss_ce": 0.008114569820463657, + "loss_iou": 0.392578125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 92925732, + "step": 958 + }, + { + "epoch": 0.09376222135314823, + "grad_norm": 25.711361186816294, + "learning_rate": 5e-05, + "loss": 0.1322, + "num_input_tokens_seen": 93022460, + "step": 959 + }, + { + "epoch": 0.09376222135314823, + "loss": 0.11572910845279694, + "loss_ce": 0.00452304445207119, + "loss_iou": 0.578125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 93022460, + "step": 959 + }, + { + "epoch": 0.09385999217833399, + "grad_norm": 21.70546676118806, + "learning_rate": 5e-05, + "loss": 0.1513, + "num_input_tokens_seen": 93118716, + "step": 960 + }, + { + "epoch": 0.09385999217833399, + "loss": 0.20944206416606903, + "loss_ce": 0.00942985713481903, + "loss_iou": 0.365234375, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 93118716, + "step": 960 + }, + { + "epoch": 0.09395776300351975, + "grad_norm": 3.6216242404661347, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 93215252, + "step": 961 + }, + { + "epoch": 0.09395776300351975, + "loss": 0.07153819501399994, + "loss_ce": 0.0033161547034978867, + "loss_iou": 0.40625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 93215252, + "step": 961 + }, + { + "epoch": 0.09405553382870552, + "grad_norm": 5.734765784374707, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 93310832, + "step": 962 + }, + { + "epoch": 0.09405553382870552, + "loss": 0.11135105043649673, + "loss_ce": 0.005683936178684235, + "loss_iou": 0.353515625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 93310832, + "step": 962 + }, + { + "epoch": 0.09415330465389128, + "grad_norm": 2.239227493714452, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 93407696, + "step": 963 + }, + { + "epoch": 0.09415330465389128, + "loss": 0.07419973611831665, + "loss_ce": 0.0030174790881574154, + "loss_iou": 0.435546875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 93407696, + "step": 963 + }, + { + "epoch": 0.09425107547907705, + "grad_norm": 12.858362324017557, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 93505684, + "step": 964 + }, + { + "epoch": 0.09425107547907705, + "loss": 0.11322619765996933, + "loss_ce": 0.004827759228646755, + "loss_iou": 0.388671875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 93505684, + "step": 964 + }, + { + "epoch": 0.09434884630426281, + "grad_norm": 26.305735324610325, + "learning_rate": 5e-05, + "loss": 0.161, + "num_input_tokens_seen": 93601780, + "step": 965 + }, + { + "epoch": 0.09434884630426281, + "loss": 0.15342356264591217, + "loss_ce": 0.006786600686609745, + "loss_iou": 0.419921875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 93601780, + "step": 965 + }, + { + "epoch": 0.09444661712944857, + "grad_norm": 11.274111522901707, + "learning_rate": 5e-05, + "loss": 0.1219, + "num_input_tokens_seen": 93698524, + "step": 966 + }, + { + "epoch": 0.09444661712944857, + "loss": 0.11394625157117844, + "loss_ce": 0.008813198655843735, + "loss_iou": 0.328125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 93698524, + "step": 966 + }, + { + "epoch": 0.09454438795463434, + "grad_norm": 3.546553008770497, + "learning_rate": 5e-05, + "loss": 0.1324, + "num_input_tokens_seen": 93795756, + "step": 967 + }, + { + "epoch": 0.09454438795463434, + "loss": 0.10964684933423996, + "loss_ce": 0.0064974334090948105, + "loss_iou": 0.3984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 93795756, + "step": 967 + }, + { + "epoch": 0.0946421587798201, + "grad_norm": 8.87143094221143, + "learning_rate": 5e-05, + "loss": 0.1326, + "num_input_tokens_seen": 93891896, + "step": 968 + }, + { + "epoch": 0.0946421587798201, + "loss": 0.14306899905204773, + "loss_ce": 0.008089750073850155, + "loss_iou": 0.28125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 93891896, + "step": 968 + }, + { + "epoch": 0.09473992960500587, + "grad_norm": 4.814545121957811, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 93989172, + "step": 969 + }, + { + "epoch": 0.09473992960500587, + "loss": 0.10083796083927155, + "loss_ce": 0.004341382533311844, + "loss_iou": 0.376953125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 93989172, + "step": 969 + }, + { + "epoch": 0.09483770043019163, + "grad_norm": 11.329754702940829, + "learning_rate": 5e-05, + "loss": 0.1227, + "num_input_tokens_seen": 94085880, + "step": 970 + }, + { + "epoch": 0.09483770043019163, + "loss": 0.09303028881549835, + "loss_ce": 0.005200703162699938, + "loss_iou": 0.384765625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 94085880, + "step": 970 + }, + { + "epoch": 0.0949354712553774, + "grad_norm": 21.2257746879624, + "learning_rate": 5e-05, + "loss": 0.1141, + "num_input_tokens_seen": 94182860, + "step": 971 + }, + { + "epoch": 0.0949354712553774, + "loss": 0.12489180266857147, + "loss_ce": 0.003767540445551276, + "loss_iou": 0.345703125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 94182860, + "step": 971 + }, + { + "epoch": 0.09503324208056316, + "grad_norm": 10.78448244327663, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 94279112, + "step": 972 + }, + { + "epoch": 0.09503324208056316, + "loss": 0.08417180180549622, + "loss_ce": 0.00479557691141963, + "loss_iou": 0.267578125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 94279112, + "step": 972 + }, + { + "epoch": 0.09513101290574892, + "grad_norm": 15.247387529879255, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 94375484, + "step": 973 + }, + { + "epoch": 0.09513101290574892, + "loss": 0.13254864513874054, + "loss_ce": 0.008937194012105465, + "loss_iou": 0.275390625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 94375484, + "step": 973 + }, + { + "epoch": 0.09522878373093469, + "grad_norm": 15.138068027200482, + "learning_rate": 5e-05, + "loss": 0.1415, + "num_input_tokens_seen": 94472152, + "step": 974 + }, + { + "epoch": 0.09522878373093469, + "loss": 0.16277644038200378, + "loss_ce": 0.0036730545107275248, + "loss_iou": 0.310546875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 94472152, + "step": 974 + }, + { + "epoch": 0.09532655455612045, + "grad_norm": 3.1773305797169655, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 94568524, + "step": 975 + }, + { + "epoch": 0.09532655455612045, + "loss": 0.05978653207421303, + "loss_ce": 0.004275057464838028, + "loss_iou": 0.296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 94568524, + "step": 975 + }, + { + "epoch": 0.09542432538130621, + "grad_norm": 5.3103840646435465, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 94665484, + "step": 976 + }, + { + "epoch": 0.09542432538130621, + "loss": 0.09580951184034348, + "loss_ce": 0.00437885383144021, + "loss_iou": 0.326171875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 94665484, + "step": 976 + }, + { + "epoch": 0.09552209620649198, + "grad_norm": 5.751319507540127, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 94762636, + "step": 977 + }, + { + "epoch": 0.09552209620649198, + "loss": 0.1219668835401535, + "loss_ce": 0.00700716394931078, + "loss_iou": 0.36328125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 94762636, + "step": 977 + }, + { + "epoch": 0.09561986703167774, + "grad_norm": 12.725914721273398, + "learning_rate": 5e-05, + "loss": 0.1548, + "num_input_tokens_seen": 94858672, + "step": 978 + }, + { + "epoch": 0.09561986703167774, + "loss": 0.17438861727714539, + "loss_ce": 0.006847105920314789, + "loss_iou": 0.51171875, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 94858672, + "step": 978 + }, + { + "epoch": 0.0957176378568635, + "grad_norm": 25.85983245772191, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 94955352, + "step": 979 + }, + { + "epoch": 0.0957176378568635, + "loss": 0.13469433784484863, + "loss_ce": 0.006947761867195368, + "loss_iou": 0.392578125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 94955352, + "step": 979 + }, + { + "epoch": 0.09581540868204928, + "grad_norm": 6.625287127648869, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 95051864, + "step": 980 + }, + { + "epoch": 0.09581540868204928, + "loss": 0.14435438811779022, + "loss_ce": 0.007269419729709625, + "loss_iou": 0.451171875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 95051864, + "step": 980 + }, + { + "epoch": 0.09591317950723505, + "grad_norm": 5.727081478601465, + "learning_rate": 5e-05, + "loss": 0.1559, + "num_input_tokens_seen": 95148804, + "step": 981 + }, + { + "epoch": 0.09591317950723505, + "loss": 0.12969408929347992, + "loss_ce": 0.006998166441917419, + "loss_iou": 0.41796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 95148804, + "step": 981 + }, + { + "epoch": 0.09601095033242081, + "grad_norm": 8.110157644977468, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 95245488, + "step": 982 + }, + { + "epoch": 0.09601095033242081, + "loss": 0.12113960087299347, + "loss_ce": 0.006881793029606342, + "loss_iou": 0.23828125, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 95245488, + "step": 982 + }, + { + "epoch": 0.09610872115760657, + "grad_norm": 7.087523433344491, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 95343052, + "step": 983 + }, + { + "epoch": 0.09610872115760657, + "loss": 0.07190819084644318, + "loss_ce": 0.006676865741610527, + "loss_iou": 0.41015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 95343052, + "step": 983 + }, + { + "epoch": 0.09620649198279234, + "grad_norm": 14.700827854759508, + "learning_rate": 5e-05, + "loss": 0.1303, + "num_input_tokens_seen": 95439684, + "step": 984 + }, + { + "epoch": 0.09620649198279234, + "loss": 0.11629490554332733, + "loss_ce": 0.003059430979192257, + "loss_iou": 0.439453125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 95439684, + "step": 984 + }, + { + "epoch": 0.0963042628079781, + "grad_norm": 28.843942531094054, + "learning_rate": 5e-05, + "loss": 0.122, + "num_input_tokens_seen": 95536444, + "step": 985 + }, + { + "epoch": 0.0963042628079781, + "loss": 0.10665399581193924, + "loss_ce": 0.005945984739810228, + "loss_iou": 0.4375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 95536444, + "step": 985 + }, + { + "epoch": 0.09640203363316387, + "grad_norm": 5.215205442703876, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 95634268, + "step": 986 + }, + { + "epoch": 0.09640203363316387, + "loss": 0.16886058449745178, + "loss_ce": 0.004309803247451782, + "loss_iou": 0.359375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 95634268, + "step": 986 + }, + { + "epoch": 0.09649980445834963, + "grad_norm": 7.081046188255245, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 95730952, + "step": 987 + }, + { + "epoch": 0.09649980445834963, + "loss": 0.07994970679283142, + "loss_ce": 0.005608892068266869, + "loss_iou": 0.37109375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 95730952, + "step": 987 + }, + { + "epoch": 0.0965975752835354, + "grad_norm": 7.535242500375964, + "learning_rate": 5e-05, + "loss": 0.1363, + "num_input_tokens_seen": 95827516, + "step": 988 + }, + { + "epoch": 0.0965975752835354, + "loss": 0.17775453627109528, + "loss_ce": 0.005574367940425873, + "loss_iou": 0.408203125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 95827516, + "step": 988 + }, + { + "epoch": 0.09669534610872116, + "grad_norm": 2.60331748688035, + "learning_rate": 5e-05, + "loss": 0.1259, + "num_input_tokens_seen": 95922744, + "step": 989 + }, + { + "epoch": 0.09669534610872116, + "loss": 0.12151409685611725, + "loss_ce": 0.008904234506189823, + "loss_iou": 0.326171875, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 95922744, + "step": 989 + }, + { + "epoch": 0.09679311693390692, + "grad_norm": 6.471653811609133, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 96019172, + "step": 990 + }, + { + "epoch": 0.09679311693390692, + "loss": 0.0700400173664093, + "loss_ce": 0.007417947053909302, + "loss_iou": 0.349609375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 96019172, + "step": 990 + }, + { + "epoch": 0.09689088775909269, + "grad_norm": 6.966342222907298, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 96115832, + "step": 991 + }, + { + "epoch": 0.09689088775909269, + "loss": 0.0851040780544281, + "loss_ce": 0.004598704166710377, + "loss_iou": 0.287109375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 96115832, + "step": 991 + }, + { + "epoch": 0.09698865858427845, + "grad_norm": 6.633658985953865, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 96214064, + "step": 992 + }, + { + "epoch": 0.09698865858427845, + "loss": 0.09347172826528549, + "loss_ce": 0.007305344566702843, + "loss_iou": 0.35546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 96214064, + "step": 992 + }, + { + "epoch": 0.09708642940946421, + "grad_norm": 3.083790198682573, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 96311140, + "step": 993 + }, + { + "epoch": 0.09708642940946421, + "loss": 0.09552063792943954, + "loss_ce": 0.005020755343139172, + "loss_iou": 0.419921875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 96311140, + "step": 993 + }, + { + "epoch": 0.09718420023464998, + "grad_norm": 4.537670479742195, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 96408444, + "step": 994 + }, + { + "epoch": 0.09718420023464998, + "loss": 0.11210982501506805, + "loss_ce": 0.007083574775606394, + "loss_iou": 0.431640625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 96408444, + "step": 994 + }, + { + "epoch": 0.09728197105983574, + "grad_norm": 3.668447029158282, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 96505572, + "step": 995 + }, + { + "epoch": 0.09728197105983574, + "loss": 0.08519099652767181, + "loss_ce": 0.0038311288226395845, + "loss_iou": 0.3046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 96505572, + "step": 995 + }, + { + "epoch": 0.0973797418850215, + "grad_norm": 5.209198925005757, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 96602136, + "step": 996 + }, + { + "epoch": 0.0973797418850215, + "loss": 0.08933582901954651, + "loss_ce": 0.00544110219925642, + "loss_iou": 0.3359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 96602136, + "step": 996 + }, + { + "epoch": 0.09747751271020727, + "grad_norm": 15.492625977745668, + "learning_rate": 5e-05, + "loss": 0.1345, + "num_input_tokens_seen": 96699188, + "step": 997 + }, + { + "epoch": 0.09747751271020727, + "loss": 0.15812453627586365, + "loss_ce": 0.004224400036036968, + "loss_iou": 0.388671875, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 96699188, + "step": 997 + }, + { + "epoch": 0.09757528353539303, + "grad_norm": 11.717280207017007, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 96795840, + "step": 998 + }, + { + "epoch": 0.09757528353539303, + "loss": 0.10384734719991684, + "loss_ce": 0.003383479081094265, + "loss_iou": 0.396484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 96795840, + "step": 998 + }, + { + "epoch": 0.0976730543605788, + "grad_norm": 9.167685800288236, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 96894276, + "step": 999 + }, + { + "epoch": 0.0976730543605788, + "loss": 0.11324401199817657, + "loss_ce": 0.006356201134622097, + "loss_iou": 0.439453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 96894276, + "step": 999 + }, + { + "epoch": 0.09777082518576456, + "grad_norm": 14.967273369206382, + "learning_rate": 5e-05, + "loss": 0.1582, + "num_input_tokens_seen": 96990976, + "step": 1000 + }, + { + "epoch": 0.09777082518576456, + "eval_seeclick_CIoU": 0.33694934844970703, + "eval_seeclick_GIoU": 0.3312452733516693, + "eval_seeclick_IoU": 0.41295652091503143, + "eval_seeclick_MAE_all": 0.11723460257053375, + "eval_seeclick_MAE_h": 0.07434692606329918, + "eval_seeclick_MAE_w": 0.1563291996717453, + "eval_seeclick_MAE_x": 0.17043612524867058, + "eval_seeclick_MAE_y": 0.06782613880932331, + "eval_seeclick_NUM_probability": 0.9998857975006104, + "eval_seeclick_inside_bbox": 0.5710227340459824, + "eval_seeclick_loss": 0.39207467436790466, + "eval_seeclick_loss_ce": 0.009942918084561825, + "eval_seeclick_loss_iou": 0.4786376953125, + "eval_seeclick_loss_num": 0.07061767578125, + "eval_seeclick_loss_xval": 0.3529052734375, + "eval_seeclick_runtime": 73.0922, + "eval_seeclick_samples_per_second": 0.588, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 96990976, + "step": 1000 + }, + { + "epoch": 0.09777082518576456, + "eval_icons_CIoU": 0.4934890866279602, + "eval_icons_GIoU": 0.49298061430454254, + "eval_icons_IoU": 0.5518409013748169, + "eval_icons_MAE_all": 0.10470304638147354, + "eval_icons_MAE_h": 0.10793198272585869, + "eval_icons_MAE_w": 0.10531671717762947, + "eval_icons_MAE_x": 0.09898035600781441, + "eval_icons_MAE_y": 0.1065831109881401, + "eval_icons_NUM_probability": 0.9997760057449341, + "eval_icons_inside_bbox": 0.7239583432674408, + "eval_icons_loss": 0.3010592460632324, + "eval_icons_loss_ce": 4.319826985010877e-05, + "eval_icons_loss_iou": 0.40142822265625, + "eval_icons_loss_num": 0.06208038330078125, + "eval_icons_loss_xval": 0.310150146484375, + "eval_icons_runtime": 90.0166, + "eval_icons_samples_per_second": 0.555, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 96990976, + "step": 1000 + }, + { + "epoch": 0.09777082518576456, + "eval_screenspot_CIoU": 0.28708069523175556, + "eval_screenspot_GIoU": 0.2748104731241862, + "eval_screenspot_IoU": 0.3797837992509206, + "eval_screenspot_MAE_all": 0.16597061852614084, + "eval_screenspot_MAE_h": 0.1313486546278, + "eval_screenspot_MAE_w": 0.2221652865409851, + "eval_screenspot_MAE_x": 0.1837755193312963, + "eval_screenspot_MAE_y": 0.12659302353858948, + "eval_screenspot_NUM_probability": 0.999842365582784, + "eval_screenspot_inside_bbox": 0.5899999936421713, + "eval_screenspot_loss": 0.5734342932701111, + "eval_screenspot_loss_ce": 0.013422979973256588, + "eval_screenspot_loss_iou": 0.3739420572916667, + "eval_screenspot_loss_num": 0.11216227213541667, + "eval_screenspot_loss_xval": 0.5607503255208334, + "eval_screenspot_runtime": 154.7152, + "eval_screenspot_samples_per_second": 0.575, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 96990976, + "step": 1000 + }, + { + "epoch": 0.09777082518576456, + "eval_compot_CIoU": 0.5211745202541351, + "eval_compot_GIoU": 0.501139298081398, + "eval_compot_IoU": 0.5657278895378113, + "eval_compot_MAE_all": 0.0812007486820221, + "eval_compot_MAE_h": 0.07155169174075127, + "eval_compot_MAE_w": 0.10384894907474518, + "eval_compot_MAE_x": 0.08590732514858246, + "eval_compot_MAE_y": 0.06349504552781582, + "eval_compot_NUM_probability": 0.9996005296707153, + "eval_compot_inside_bbox": 0.7673611044883728, + "eval_compot_loss": 0.34895965456962585, + "eval_compot_loss_ce": 0.0712319165468216, + "eval_compot_loss_iou": 0.59130859375, + "eval_compot_loss_num": 0.05278778076171875, + "eval_compot_loss_xval": 0.264007568359375, + "eval_compot_runtime": 87.948, + "eval_compot_samples_per_second": 0.569, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 96990976, + "step": 1000 + }, + { + "epoch": 0.09777082518576456, + "loss": 0.33731698989868164, + "loss_ce": 0.07871101796627045, + "loss_iou": 0.59765625, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 96990976, + "step": 1000 + }, + { + "epoch": 0.09786859601095033, + "grad_norm": 8.253263813182706, + "learning_rate": 5e-05, + "loss": 0.1956, + "num_input_tokens_seen": 97087700, + "step": 1001 + }, + { + "epoch": 0.09786859601095033, + "loss": 0.22617793083190918, + "loss_ce": 0.00852655153721571, + "loss_iou": 0.28125, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 97087700, + "step": 1001 + }, + { + "epoch": 0.09796636683613609, + "grad_norm": 4.180010508628714, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 97185176, + "step": 1002 + }, + { + "epoch": 0.09796636683613609, + "loss": 0.10410337150096893, + "loss_ce": 0.003425880102440715, + "loss_iou": 0.5390625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 97185176, + "step": 1002 + }, + { + "epoch": 0.09806413766132187, + "grad_norm": 3.3097685568133666, + "learning_rate": 5e-05, + "loss": 0.1292, + "num_input_tokens_seen": 97281820, + "step": 1003 + }, + { + "epoch": 0.09806413766132187, + "loss": 0.14466466009616852, + "loss_ce": 0.0058707124553620815, + "loss_iou": 0.478515625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 97281820, + "step": 1003 + }, + { + "epoch": 0.09816190848650763, + "grad_norm": 6.208771363650852, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 97379892, + "step": 1004 + }, + { + "epoch": 0.09816190848650763, + "loss": 0.1019798219203949, + "loss_ce": 0.006093594711273909, + "loss_iou": 0.37890625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 97379892, + "step": 1004 + }, + { + "epoch": 0.0982596793116934, + "grad_norm": 4.055877827415086, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 97477316, + "step": 1005 + }, + { + "epoch": 0.0982596793116934, + "loss": 0.13502982258796692, + "loss_ce": 0.004780794959515333, + "loss_iou": 0.453125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 97477316, + "step": 1005 + }, + { + "epoch": 0.09835745013687916, + "grad_norm": 8.027243344216371, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 97575428, + "step": 1006 + }, + { + "epoch": 0.09835745013687916, + "loss": 0.06321858614683151, + "loss_ce": 0.006974692922085524, + "loss_iou": 0.392578125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 97575428, + "step": 1006 + }, + { + "epoch": 0.09845522096206492, + "grad_norm": 5.218721063184248, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 97672208, + "step": 1007 + }, + { + "epoch": 0.09845522096206492, + "loss": 0.1482059806585312, + "loss_ce": 0.006016955710947514, + "loss_iou": 0.34375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 97672208, + "step": 1007 + }, + { + "epoch": 0.09855299178725069, + "grad_norm": 17.742806500358615, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 97768864, + "step": 1008 + }, + { + "epoch": 0.09855299178725069, + "loss": 0.08382343500852585, + "loss_ce": 0.006766549777239561, + "loss_iou": 0.310546875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 97768864, + "step": 1008 + }, + { + "epoch": 0.09865076261243645, + "grad_norm": 35.380857651516955, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 97865444, + "step": 1009 + }, + { + "epoch": 0.09865076261243645, + "loss": 0.12538199126720428, + "loss_ce": 0.00355583056807518, + "loss_iou": 0.35546875, + "loss_num": 0.0242919921875, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 97865444, + "step": 1009 + }, + { + "epoch": 0.09874853343762222, + "grad_norm": 5.541855622483345, + "learning_rate": 5e-05, + "loss": 0.1301, + "num_input_tokens_seen": 97962060, + "step": 1010 + }, + { + "epoch": 0.09874853343762222, + "loss": 0.12343735992908478, + "loss_ce": 0.004052584990859032, + "loss_iou": 0.5078125, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 97962060, + "step": 1010 + }, + { + "epoch": 0.09884630426280798, + "grad_norm": 8.739968252625543, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 98059140, + "step": 1011 + }, + { + "epoch": 0.09884630426280798, + "loss": 0.10264454782009125, + "loss_ce": 0.0008989418856799603, + "loss_iou": 0.37109375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 98059140, + "step": 1011 + }, + { + "epoch": 0.09894407508799374, + "grad_norm": 8.588604852041216, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 98155616, + "step": 1012 + }, + { + "epoch": 0.09894407508799374, + "loss": 0.129496231675148, + "loss_ce": 0.01473487913608551, + "loss_iou": 0.322265625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 98155616, + "step": 1012 + }, + { + "epoch": 0.09904184591317951, + "grad_norm": 5.469988186174033, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 98252400, + "step": 1013 + }, + { + "epoch": 0.09904184591317951, + "loss": 0.08883707225322723, + "loss_ce": 0.016662994399666786, + "loss_iou": 0.51953125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 98252400, + "step": 1013 + }, + { + "epoch": 0.09913961673836527, + "grad_norm": 10.367540916281516, + "learning_rate": 5e-05, + "loss": 0.1905, + "num_input_tokens_seen": 98348912, + "step": 1014 + }, + { + "epoch": 0.09913961673836527, + "loss": 0.15924952924251556, + "loss_ce": 0.007516125217080116, + "loss_iou": 0.55859375, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 98348912, + "step": 1014 + }, + { + "epoch": 0.09923738756355104, + "grad_norm": 5.576392446116985, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 98445648, + "step": 1015 + }, + { + "epoch": 0.09923738756355104, + "loss": 0.1006188690662384, + "loss_ce": 0.005465063266456127, + "loss_iou": 0.3828125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 98445648, + "step": 1015 + }, + { + "epoch": 0.0993351583887368, + "grad_norm": 7.988991730219083, + "learning_rate": 5e-05, + "loss": 0.1337, + "num_input_tokens_seen": 98542824, + "step": 1016 + }, + { + "epoch": 0.0993351583887368, + "loss": 0.12897363305091858, + "loss_ce": 0.008917476050555706, + "loss_iou": 0.50390625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 98542824, + "step": 1016 + }, + { + "epoch": 0.09943292921392256, + "grad_norm": 3.2866463094151204, + "learning_rate": 5e-05, + "loss": 0.1592, + "num_input_tokens_seen": 98639812, + "step": 1017 + }, + { + "epoch": 0.09943292921392256, + "loss": 0.140435591340065, + "loss_ce": 0.0049375491216778755, + "loss_iou": 0.3984375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 98639812, + "step": 1017 + }, + { + "epoch": 0.09953070003910833, + "grad_norm": 8.677412631436367, + "learning_rate": 5e-05, + "loss": 0.1195, + "num_input_tokens_seen": 98737004, + "step": 1018 + }, + { + "epoch": 0.09953070003910833, + "loss": 0.10892775654792786, + "loss_ce": 0.004069351591169834, + "loss_iou": 0.384765625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 98737004, + "step": 1018 + }, + { + "epoch": 0.09962847086429409, + "grad_norm": 10.156317883266784, + "learning_rate": 5e-05, + "loss": 0.1563, + "num_input_tokens_seen": 98833980, + "step": 1019 + }, + { + "epoch": 0.09962847086429409, + "loss": 0.1343812644481659, + "loss_ce": 0.008709874004125595, + "loss_iou": 0.515625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 98833980, + "step": 1019 + }, + { + "epoch": 0.09972624168947986, + "grad_norm": 10.336437568071435, + "learning_rate": 5e-05, + "loss": 0.1473, + "num_input_tokens_seen": 98930820, + "step": 1020 + }, + { + "epoch": 0.09972624168947986, + "loss": 0.14544416964054108, + "loss_ce": 0.007199541665613651, + "loss_iou": 0.416015625, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 98930820, + "step": 1020 + }, + { + "epoch": 0.09982401251466562, + "grad_norm": 14.222560833485305, + "learning_rate": 5e-05, + "loss": 0.1589, + "num_input_tokens_seen": 99028796, + "step": 1021 + }, + { + "epoch": 0.09982401251466562, + "loss": 0.12499332427978516, + "loss_ce": 0.006218915805220604, + "loss_iou": 0.54296875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 99028796, + "step": 1021 + }, + { + "epoch": 0.09992178333985138, + "grad_norm": 4.35082960849976, + "learning_rate": 5e-05, + "loss": 0.1494, + "num_input_tokens_seen": 99125504, + "step": 1022 + }, + { + "epoch": 0.09992178333985138, + "loss": 0.1685175895690918, + "loss_ce": 0.004638193175196648, + "loss_iou": 0.484375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 99125504, + "step": 1022 + }, + { + "epoch": 0.10001955416503715, + "grad_norm": 3.2941480053689753, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 99222120, + "step": 1023 + }, + { + "epoch": 0.10001955416503715, + "loss": 0.11239181458950043, + "loss_ce": 0.011012422852218151, + "loss_iou": 0.408203125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 99222120, + "step": 1023 + }, + { + "epoch": 0.10011732499022291, + "grad_norm": 5.250141980604845, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 99319464, + "step": 1024 + }, + { + "epoch": 0.10011732499022291, + "loss": 0.12155793607234955, + "loss_ce": 0.011419996619224548, + "loss_iou": 0.41796875, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 99319464, + "step": 1024 + }, + { + "epoch": 0.10021509581540868, + "grad_norm": 5.402353384779041, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 99416296, + "step": 1025 + }, + { + "epoch": 0.10021509581540868, + "loss": 0.11025013029575348, + "loss_ce": 0.003286018967628479, + "loss_iou": 0.431640625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 99416296, + "step": 1025 + }, + { + "epoch": 0.10031286664059445, + "grad_norm": 4.616323494308997, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 99513396, + "step": 1026 + }, + { + "epoch": 0.10031286664059445, + "loss": 0.06885534524917603, + "loss_ce": 0.0035629873163998127, + "loss_iou": 0.365234375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 99513396, + "step": 1026 + }, + { + "epoch": 0.10041063746578022, + "grad_norm": 17.405196356726876, + "learning_rate": 5e-05, + "loss": 0.1233, + "num_input_tokens_seen": 99609980, + "step": 1027 + }, + { + "epoch": 0.10041063746578022, + "loss": 0.12963251769542694, + "loss_ce": 0.004754586145281792, + "loss_iou": 0.43359375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 99609980, + "step": 1027 + }, + { + "epoch": 0.10050840829096598, + "grad_norm": 17.954737960617383, + "learning_rate": 5e-05, + "loss": 0.1553, + "num_input_tokens_seen": 99707760, + "step": 1028 + }, + { + "epoch": 0.10050840829096598, + "loss": 0.08322605490684509, + "loss_ce": 0.008366442285478115, + "loss_iou": 0.40234375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 99707760, + "step": 1028 + }, + { + "epoch": 0.10060617911615174, + "grad_norm": 5.3067728846319735, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 99805152, + "step": 1029 + }, + { + "epoch": 0.10060617911615174, + "loss": 0.10074086487293243, + "loss_ce": 0.005922751501202583, + "loss_iou": 0.458984375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 99805152, + "step": 1029 + }, + { + "epoch": 0.10070394994133751, + "grad_norm": 5.029090306194696, + "learning_rate": 5e-05, + "loss": 0.1271, + "num_input_tokens_seen": 99902020, + "step": 1030 + }, + { + "epoch": 0.10070394994133751, + "loss": 0.10198597609996796, + "loss_ce": 0.00996022205799818, + "loss_iou": 0.2216796875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 99902020, + "step": 1030 + }, + { + "epoch": 0.10080172076652327, + "grad_norm": 15.59262832394577, + "learning_rate": 5e-05, + "loss": 0.1274, + "num_input_tokens_seen": 99998528, + "step": 1031 + }, + { + "epoch": 0.10080172076652327, + "loss": 0.09417029470205307, + "loss_ce": 0.005943973083049059, + "loss_iou": 0.380859375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 99998528, + "step": 1031 + }, + { + "epoch": 0.10089949159170904, + "grad_norm": 12.095876324579347, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 100094916, + "step": 1032 + }, + { + "epoch": 0.10089949159170904, + "loss": 0.10661402344703674, + "loss_ce": 0.004593757446855307, + "loss_iou": 0.421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 100094916, + "step": 1032 + }, + { + "epoch": 0.1009972624168948, + "grad_norm": 17.78796600069279, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 100191292, + "step": 1033 + }, + { + "epoch": 0.1009972624168948, + "loss": 0.10480007529258728, + "loss_ce": 0.0017879914958029985, + "loss_iou": 0.333984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 100191292, + "step": 1033 + }, + { + "epoch": 0.10109503324208056, + "grad_norm": 15.633933331714998, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 100289084, + "step": 1034 + }, + { + "epoch": 0.10109503324208056, + "loss": 0.07800716161727905, + "loss_ce": 0.003071252256631851, + "loss_iou": 0.447265625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 100289084, + "step": 1034 + }, + { + "epoch": 0.10119280406726633, + "grad_norm": 52.232563111789965, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 100387128, + "step": 1035 + }, + { + "epoch": 0.10119280406726633, + "loss": 0.1310906559228897, + "loss_ce": 0.011065018363296986, + "loss_iou": 0.453125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 100387128, + "step": 1035 + }, + { + "epoch": 0.10129057489245209, + "grad_norm": 7.820317143749673, + "learning_rate": 5e-05, + "loss": 0.1624, + "num_input_tokens_seen": 100484504, + "step": 1036 + }, + { + "epoch": 0.10129057489245209, + "loss": 0.12757988274097443, + "loss_ce": 0.015397260896861553, + "loss_iou": 0.376953125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 100484504, + "step": 1036 + }, + { + "epoch": 0.10138834571763786, + "grad_norm": 12.998924171738535, + "learning_rate": 5e-05, + "loss": 0.1578, + "num_input_tokens_seen": 100582648, + "step": 1037 + }, + { + "epoch": 0.10138834571763786, + "loss": 0.16579818725585938, + "loss_ce": 0.003200515639036894, + "loss_iou": 0.578125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 100582648, + "step": 1037 + }, + { + "epoch": 0.10148611654282362, + "grad_norm": 5.370540852571059, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 100679568, + "step": 1038 + }, + { + "epoch": 0.10148611654282362, + "loss": 0.1411036252975464, + "loss_ce": 0.003816495416685939, + "loss_iou": 0.39453125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 100679568, + "step": 1038 + }, + { + "epoch": 0.10158388736800938, + "grad_norm": 5.6116634456378085, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 100777860, + "step": 1039 + }, + { + "epoch": 0.10158388736800938, + "loss": 0.10639692842960358, + "loss_ce": 0.0052921948954463005, + "loss_iou": 0.451171875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 100777860, + "step": 1039 + }, + { + "epoch": 0.10168165819319515, + "grad_norm": 5.335864160695211, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 100876120, + "step": 1040 + }, + { + "epoch": 0.10168165819319515, + "loss": 0.08891192078590393, + "loss_ce": 0.008513355627655983, + "loss_iou": 0.458984375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 100876120, + "step": 1040 + }, + { + "epoch": 0.10177942901838091, + "grad_norm": 12.053790884714925, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 100973228, + "step": 1041 + }, + { + "epoch": 0.10177942901838091, + "loss": 0.09510014951229095, + "loss_ce": 0.002845511771738529, + "loss_iou": 0.49609375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 100973228, + "step": 1041 + }, + { + "epoch": 0.10187719984356668, + "grad_norm": 6.228959923893588, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 101070112, + "step": 1042 + }, + { + "epoch": 0.10187719984356668, + "loss": 0.13237419724464417, + "loss_ce": 0.0028575905598700047, + "loss_iou": 0.53125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 101070112, + "step": 1042 + }, + { + "epoch": 0.10197497066875244, + "grad_norm": 6.5555943667734455, + "learning_rate": 5e-05, + "loss": 0.1337, + "num_input_tokens_seen": 101166844, + "step": 1043 + }, + { + "epoch": 0.10197497066875244, + "loss": 0.10460414737462997, + "loss_ce": 0.005269424524158239, + "loss_iou": 0.50390625, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 101166844, + "step": 1043 + }, + { + "epoch": 0.1020727414939382, + "grad_norm": 15.534768423236498, + "learning_rate": 5e-05, + "loss": 0.1228, + "num_input_tokens_seen": 101263628, + "step": 1044 + }, + { + "epoch": 0.1020727414939382, + "loss": 0.08899348974227905, + "loss_ce": 0.00665706442669034, + "loss_iou": 0.31640625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 101263628, + "step": 1044 + }, + { + "epoch": 0.10217051231912397, + "grad_norm": 28.444042862451106, + "learning_rate": 5e-05, + "loss": 0.1234, + "num_input_tokens_seen": 101361060, + "step": 1045 + }, + { + "epoch": 0.10217051231912397, + "loss": 0.12060189247131348, + "loss_ce": 0.00634408462792635, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 101361060, + "step": 1045 + }, + { + "epoch": 0.10226828314430973, + "grad_norm": 12.462848485364175, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 101458028, + "step": 1046 + }, + { + "epoch": 0.10226828314430973, + "loss": 0.12537246942520142, + "loss_ce": 0.00797134917229414, + "loss_iou": 0.48046875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 101458028, + "step": 1046 + }, + { + "epoch": 0.1023660539694955, + "grad_norm": 5.079125606895069, + "learning_rate": 5e-05, + "loss": 0.1475, + "num_input_tokens_seen": 101555096, + "step": 1047 + }, + { + "epoch": 0.1023660539694955, + "loss": 0.14316114783287048, + "loss_ce": 0.004214617423713207, + "loss_iou": 0.4921875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 101555096, + "step": 1047 + }, + { + "epoch": 0.10246382479468126, + "grad_norm": 6.070745046388251, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 101652800, + "step": 1048 + }, + { + "epoch": 0.10246382479468126, + "loss": 0.0941939726471901, + "loss_ce": 0.0047774724662303925, + "loss_iou": 0.427734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 101652800, + "step": 1048 + }, + { + "epoch": 0.10256159561986704, + "grad_norm": 4.580088768476591, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 101749952, + "step": 1049 + }, + { + "epoch": 0.10256159561986704, + "loss": 0.13260746002197266, + "loss_ce": 0.007699017412960529, + "loss_iou": 0.330078125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 101749952, + "step": 1049 + }, + { + "epoch": 0.1026593664450528, + "grad_norm": 11.580260563855386, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 101846832, + "step": 1050 + }, + { + "epoch": 0.1026593664450528, + "loss": 0.11701436340808868, + "loss_ce": 0.008570145815610886, + "loss_iou": 0.451171875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 101846832, + "step": 1050 + }, + { + "epoch": 0.10275713727023857, + "grad_norm": 12.49890204307791, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 101943428, + "step": 1051 + }, + { + "epoch": 0.10275713727023857, + "loss": 0.06450258195400238, + "loss_ce": 0.008300649002194405, + "loss_iou": 0.23828125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 101943428, + "step": 1051 + }, + { + "epoch": 0.10285490809542433, + "grad_norm": 8.057925450460612, + "learning_rate": 5e-05, + "loss": 0.1529, + "num_input_tokens_seen": 102040920, + "step": 1052 + }, + { + "epoch": 0.10285490809542433, + "loss": 0.15244176983833313, + "loss_ce": 0.009131226688623428, + "loss_iou": 0.443359375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 102040920, + "step": 1052 + }, + { + "epoch": 0.1029526789206101, + "grad_norm": 2.882214196955776, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 102137936, + "step": 1053 + }, + { + "epoch": 0.1029526789206101, + "loss": 0.0854535698890686, + "loss_ce": 0.0073895989917218685, + "loss_iou": 0.4140625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 102137936, + "step": 1053 + }, + { + "epoch": 0.10305044974579586, + "grad_norm": 16.61060805074234, + "learning_rate": 5e-05, + "loss": 0.1247, + "num_input_tokens_seen": 102234988, + "step": 1054 + }, + { + "epoch": 0.10305044974579586, + "loss": 0.11079896986484528, + "loss_ce": 0.004902976565063, + "loss_iou": 0.4140625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 102234988, + "step": 1054 + }, + { + "epoch": 0.10314822057098162, + "grad_norm": 18.236581753192347, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 102331744, + "step": 1055 + }, + { + "epoch": 0.10314822057098162, + "loss": 0.10984062403440475, + "loss_ce": 0.009895558468997478, + "loss_iou": 0.29296875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 102331744, + "step": 1055 + }, + { + "epoch": 0.10324599139616739, + "grad_norm": 12.284679650867867, + "learning_rate": 5e-05, + "loss": 0.1427, + "num_input_tokens_seen": 102428564, + "step": 1056 + }, + { + "epoch": 0.10324599139616739, + "loss": 0.14597547054290771, + "loss_ce": 0.003992436453700066, + "loss_iou": 0.453125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 102428564, + "step": 1056 + }, + { + "epoch": 0.10334376222135315, + "grad_norm": 12.853336681313706, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 102525584, + "step": 1057 + }, + { + "epoch": 0.10334376222135315, + "loss": 0.09097166359424591, + "loss_ce": 0.002531728008762002, + "loss_iou": 0.361328125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 102525584, + "step": 1057 + }, + { + "epoch": 0.10344153304653891, + "grad_norm": 8.087947170554454, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 102623420, + "step": 1058 + }, + { + "epoch": 0.10344153304653891, + "loss": 0.10144411772489548, + "loss_ce": 0.0032538045197725296, + "loss_iou": 0.46875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 102623420, + "step": 1058 + }, + { + "epoch": 0.10353930387172468, + "grad_norm": 12.30694507927958, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 102720764, + "step": 1059 + }, + { + "epoch": 0.10353930387172468, + "loss": 0.11004337668418884, + "loss_ce": 0.0032318569719791412, + "loss_iou": 0.392578125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 102720764, + "step": 1059 + }, + { + "epoch": 0.10363707469691044, + "grad_norm": 7.968510315889658, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 102816588, + "step": 1060 + }, + { + "epoch": 0.10363707469691044, + "loss": 0.10065793991088867, + "loss_ce": 0.00524473562836647, + "loss_iou": 0.357421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 102816588, + "step": 1060 + }, + { + "epoch": 0.1037348455220962, + "grad_norm": 4.856171776313694, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 102912752, + "step": 1061 + }, + { + "epoch": 0.1037348455220962, + "loss": 0.0789998322725296, + "loss_ce": 0.003041577525436878, + "loss_iou": 0.326171875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 102912752, + "step": 1061 + }, + { + "epoch": 0.10383261634728197, + "grad_norm": 21.45506688713066, + "learning_rate": 5e-05, + "loss": 0.1368, + "num_input_tokens_seen": 103009552, + "step": 1062 + }, + { + "epoch": 0.10383261634728197, + "loss": 0.17225942015647888, + "loss_ce": 0.009356589987874031, + "loss_iou": 0.43359375, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 103009552, + "step": 1062 + }, + { + "epoch": 0.10393038717246773, + "grad_norm": 24.78783825456864, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 103107248, + "step": 1063 + }, + { + "epoch": 0.10393038717246773, + "loss": 0.09019933640956879, + "loss_ce": 0.003163198009133339, + "loss_iou": 0.458984375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 103107248, + "step": 1063 + }, + { + "epoch": 0.1040281579976535, + "grad_norm": 5.371443916948244, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 103204060, + "step": 1064 + }, + { + "epoch": 0.1040281579976535, + "loss": 0.08148172497749329, + "loss_ce": 0.0070531656965613365, + "loss_iou": 0.419921875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 103204060, + "step": 1064 + }, + { + "epoch": 0.10412592882283926, + "grad_norm": 6.899303681032357, + "learning_rate": 5e-05, + "loss": 0.1267, + "num_input_tokens_seen": 103301516, + "step": 1065 + }, + { + "epoch": 0.10412592882283926, + "loss": 0.09867256134748459, + "loss_ce": 0.0029084011912345886, + "loss_iou": 0.396484375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 103301516, + "step": 1065 + }, + { + "epoch": 0.10422369964802503, + "grad_norm": 7.767771644967818, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 103397804, + "step": 1066 + }, + { + "epoch": 0.10422369964802503, + "loss": 0.0874541699886322, + "loss_ce": 0.0141204334795475, + "loss_iou": 0.369140625, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 103397804, + "step": 1066 + }, + { + "epoch": 0.10432147047321079, + "grad_norm": 38.87721303708896, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 103494104, + "step": 1067 + }, + { + "epoch": 0.10432147047321079, + "loss": 0.09779786318540573, + "loss_ce": 0.0056195128709077835, + "loss_iou": 0.2109375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 103494104, + "step": 1067 + }, + { + "epoch": 0.10441924129839655, + "grad_norm": 7.077443066747382, + "learning_rate": 5e-05, + "loss": 0.1665, + "num_input_tokens_seen": 103591040, + "step": 1068 + }, + { + "epoch": 0.10441924129839655, + "loss": 0.24576614797115326, + "loss_ce": 0.007118675857782364, + "loss_iou": 0.435546875, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 103591040, + "step": 1068 + }, + { + "epoch": 0.10451701212358232, + "grad_norm": 6.2107499547087475, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 103689384, + "step": 1069 + }, + { + "epoch": 0.10451701212358232, + "loss": 0.13118094205856323, + "loss_ce": 0.00959891639649868, + "loss_iou": 0.306640625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 103689384, + "step": 1069 + }, + { + "epoch": 0.10461478294876808, + "grad_norm": 4.815218029228695, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 103786080, + "step": 1070 + }, + { + "epoch": 0.10461478294876808, + "loss": 0.08329334855079651, + "loss_ce": 0.007911117747426033, + "loss_iou": 0.453125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 103786080, + "step": 1070 + }, + { + "epoch": 0.10471255377395385, + "grad_norm": 10.339772322323785, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 103883452, + "step": 1071 + }, + { + "epoch": 0.10471255377395385, + "loss": 0.11421142518520355, + "loss_ce": 0.008208618499338627, + "loss_iou": 0.26171875, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 103883452, + "step": 1071 + }, + { + "epoch": 0.10481032459913962, + "grad_norm": 8.252065869597178, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 103980196, + "step": 1072 + }, + { + "epoch": 0.10481032459913962, + "loss": 0.10442652553319931, + "loss_ce": 0.0071059707552194595, + "loss_iou": 0.515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 103980196, + "step": 1072 + }, + { + "epoch": 0.10490809542432539, + "grad_norm": 10.261173298832846, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 104077148, + "step": 1073 + }, + { + "epoch": 0.10490809542432539, + "loss": 0.09572798013687134, + "loss_ce": 0.0033207498490810394, + "loss_iou": 0.31640625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 104077148, + "step": 1073 + }, + { + "epoch": 0.10500586624951115, + "grad_norm": 11.224295970953749, + "learning_rate": 5e-05, + "loss": 0.127, + "num_input_tokens_seen": 104173944, + "step": 1074 + }, + { + "epoch": 0.10500586624951115, + "loss": 0.11573780328035355, + "loss_ce": 0.008407477289438248, + "loss_iou": 0.345703125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 104173944, + "step": 1074 + }, + { + "epoch": 0.10510363707469692, + "grad_norm": 8.583732407734983, + "learning_rate": 5e-05, + "loss": 0.1698, + "num_input_tokens_seen": 104271084, + "step": 1075 + }, + { + "epoch": 0.10510363707469692, + "loss": 0.19884760677814484, + "loss_ce": 0.008051720447838306, + "loss_iou": 0.36328125, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 104271084, + "step": 1075 + }, + { + "epoch": 0.10520140789988268, + "grad_norm": 11.173517708303475, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 104368556, + "step": 1076 + }, + { + "epoch": 0.10520140789988268, + "loss": 0.11247138679027557, + "loss_ce": 0.007429882418364286, + "loss_iou": 0.4609375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 104368556, + "step": 1076 + }, + { + "epoch": 0.10529917872506844, + "grad_norm": 20.24414596637834, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 104465092, + "step": 1077 + }, + { + "epoch": 0.10529917872506844, + "loss": 0.09625953435897827, + "loss_ce": 0.0062937140464782715, + "loss_iou": 0.337890625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 104465092, + "step": 1077 + }, + { + "epoch": 0.10539694955025421, + "grad_norm": 14.48481348176547, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 104561708, + "step": 1078 + }, + { + "epoch": 0.10539694955025421, + "loss": 0.0829344242811203, + "loss_ce": 0.008196873590350151, + "loss_iou": 0.490234375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 104561708, + "step": 1078 + }, + { + "epoch": 0.10549472037543997, + "grad_norm": 9.732932732253195, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 104658304, + "step": 1079 + }, + { + "epoch": 0.10549472037543997, + "loss": 0.0976158082485199, + "loss_ce": 0.0028205832932144403, + "loss_iou": 0.35546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 104658304, + "step": 1079 + }, + { + "epoch": 0.10559249120062573, + "grad_norm": 13.833656041640017, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 104756276, + "step": 1080 + }, + { + "epoch": 0.10559249120062573, + "loss": 0.11973045021295547, + "loss_ce": 0.005136952269822359, + "loss_iou": 0.470703125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 104756276, + "step": 1080 + }, + { + "epoch": 0.1056902620258115, + "grad_norm": 31.05194574268997, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 104854232, + "step": 1081 + }, + { + "epoch": 0.1056902620258115, + "loss": 0.12976479530334473, + "loss_ce": 0.009769671596586704, + "loss_iou": 0.3984375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 104854232, + "step": 1081 + }, + { + "epoch": 0.10578803285099726, + "grad_norm": 26.19481983846909, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 104951020, + "step": 1082 + }, + { + "epoch": 0.10578803285099726, + "loss": 0.11299841105937958, + "loss_ce": 0.007743287831544876, + "loss_iou": 0.419921875, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 104951020, + "step": 1082 + }, + { + "epoch": 0.10588580367618303, + "grad_norm": 9.879399628657419, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 105048716, + "step": 1083 + }, + { + "epoch": 0.10588580367618303, + "loss": 0.08978003263473511, + "loss_ce": 0.0033542548771947622, + "loss_iou": 0.423828125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 105048716, + "step": 1083 + }, + { + "epoch": 0.10598357450136879, + "grad_norm": 7.556779001691072, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 105145308, + "step": 1084 + }, + { + "epoch": 0.10598357450136879, + "loss": 0.11797576397657394, + "loss_ce": 0.006067809648811817, + "loss_iou": 0.55078125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 105145308, + "step": 1084 + }, + { + "epoch": 0.10608134532655455, + "grad_norm": 11.33594158813834, + "learning_rate": 5e-05, + "loss": 0.1434, + "num_input_tokens_seen": 105242708, + "step": 1085 + }, + { + "epoch": 0.10608134532655455, + "loss": 0.15482133626937866, + "loss_ce": 0.009466102346777916, + "loss_iou": 0.4609375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 105242708, + "step": 1085 + }, + { + "epoch": 0.10617911615174032, + "grad_norm": 21.325056426494392, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 105339016, + "step": 1086 + }, + { + "epoch": 0.10617911615174032, + "loss": 0.10527803748846054, + "loss_ce": 0.007179289124906063, + "loss_iou": 0.419921875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 105339016, + "step": 1086 + }, + { + "epoch": 0.10627688697692608, + "grad_norm": 10.541830582158026, + "learning_rate": 5e-05, + "loss": 0.1354, + "num_input_tokens_seen": 105435720, + "step": 1087 + }, + { + "epoch": 0.10627688697692608, + "loss": 0.1045825332403183, + "loss_ce": 0.0032641789875924587, + "loss_iou": 0.443359375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 105435720, + "step": 1087 + }, + { + "epoch": 0.10637465780211185, + "grad_norm": 11.400288953348374, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 105532388, + "step": 1088 + }, + { + "epoch": 0.10637465780211185, + "loss": 0.11295205354690552, + "loss_ce": 0.007788483053445816, + "loss_iou": 0.37890625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 105532388, + "step": 1088 + }, + { + "epoch": 0.10647242862729761, + "grad_norm": 6.345432344445618, + "learning_rate": 5e-05, + "loss": 0.122, + "num_input_tokens_seen": 105629272, + "step": 1089 + }, + { + "epoch": 0.10647242862729761, + "loss": 0.13258153200149536, + "loss_ce": 0.0034616501070559025, + "loss_iou": 0.3828125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 105629272, + "step": 1089 + }, + { + "epoch": 0.10657019945248337, + "grad_norm": 19.492258438781946, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 105726476, + "step": 1090 + }, + { + "epoch": 0.10657019945248337, + "loss": 0.07077377289533615, + "loss_ce": 0.0062901293858885765, + "loss_iou": 0.40234375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 105726476, + "step": 1090 + }, + { + "epoch": 0.10666797027766914, + "grad_norm": 3.589610286108694, + "learning_rate": 5e-05, + "loss": 0.135, + "num_input_tokens_seen": 105825020, + "step": 1091 + }, + { + "epoch": 0.10666797027766914, + "loss": 0.1439419388771057, + "loss_ce": 0.0075588831678032875, + "loss_iou": 0.458984375, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 105825020, + "step": 1091 + }, + { + "epoch": 0.1067657411028549, + "grad_norm": 7.30036031515459, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 105922820, + "step": 1092 + }, + { + "epoch": 0.1067657411028549, + "loss": 0.11088962107896805, + "loss_ce": 0.00496310880407691, + "loss_iou": 0.359375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 105922820, + "step": 1092 + }, + { + "epoch": 0.10686351192804067, + "grad_norm": 2.335033001090154, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 106019196, + "step": 1093 + }, + { + "epoch": 0.10686351192804067, + "loss": 0.08444218337535858, + "loss_ce": 0.00888829119503498, + "loss_iou": 0.1708984375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 106019196, + "step": 1093 + }, + { + "epoch": 0.10696128275322643, + "grad_norm": 12.650167935719674, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 106115788, + "step": 1094 + }, + { + "epoch": 0.10696128275322643, + "loss": 0.10634222626686096, + "loss_ce": 0.006275085732340813, + "loss_iou": 0.359375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 106115788, + "step": 1094 + }, + { + "epoch": 0.10705905357841221, + "grad_norm": 12.54817596677724, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 106212924, + "step": 1095 + }, + { + "epoch": 0.10705905357841221, + "loss": 0.11157229542732239, + "loss_ce": 0.006500271614640951, + "loss_iou": 0.42578125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 106212924, + "step": 1095 + }, + { + "epoch": 0.10715682440359797, + "grad_norm": 24.436441827562337, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 106310124, + "step": 1096 + }, + { + "epoch": 0.10715682440359797, + "loss": 0.0973123162984848, + "loss_ce": 0.0034554966259747744, + "loss_iou": 0.3515625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 106310124, + "step": 1096 + }, + { + "epoch": 0.10725459522878374, + "grad_norm": 28.28565108300648, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 106406684, + "step": 1097 + }, + { + "epoch": 0.10725459522878374, + "loss": 0.09936478734016418, + "loss_ce": 0.007049111649394035, + "loss_iou": 0.294921875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 106406684, + "step": 1097 + }, + { + "epoch": 0.1073523660539695, + "grad_norm": 13.96464490547207, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 106502992, + "step": 1098 + }, + { + "epoch": 0.1073523660539695, + "loss": 0.07976873219013214, + "loss_ce": 0.006587577518075705, + "loss_iou": 0.31640625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 106502992, + "step": 1098 + }, + { + "epoch": 0.10745013687915526, + "grad_norm": 5.474757191644851, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 106599860, + "step": 1099 + }, + { + "epoch": 0.10745013687915526, + "loss": 0.10252133756875992, + "loss_ce": 0.006970799062401056, + "loss_iou": 0.345703125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 106599860, + "step": 1099 + }, + { + "epoch": 0.10754790770434103, + "grad_norm": 2.5834389188593616, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 106696472, + "step": 1100 + }, + { + "epoch": 0.10754790770434103, + "loss": 0.12930244207382202, + "loss_ce": 0.004363478161394596, + "loss_iou": 0.302734375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 106696472, + "step": 1100 + }, + { + "epoch": 0.10764567852952679, + "grad_norm": 4.43149809111327, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 106792992, + "step": 1101 + }, + { + "epoch": 0.10764567852952679, + "loss": 0.0877695232629776, + "loss_ce": 0.0038652513176202774, + "loss_iou": 0.322265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 106792992, + "step": 1101 + }, + { + "epoch": 0.10774344935471256, + "grad_norm": 17.864746340752127, + "learning_rate": 5e-05, + "loss": 0.1167, + "num_input_tokens_seen": 106890844, + "step": 1102 + }, + { + "epoch": 0.10774344935471256, + "loss": 0.07525596022605896, + "loss_ce": 0.0056148418225348, + "loss_iou": 0.45703125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 106890844, + "step": 1102 + }, + { + "epoch": 0.10784122017989832, + "grad_norm": 14.295399512394578, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 106986988, + "step": 1103 + }, + { + "epoch": 0.10784122017989832, + "loss": 0.10290919244289398, + "loss_ce": 0.0026894614566117525, + "loss_iou": 0.46875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 106986988, + "step": 1103 + }, + { + "epoch": 0.10793899100508408, + "grad_norm": 7.882415758773471, + "learning_rate": 5e-05, + "loss": 0.1463, + "num_input_tokens_seen": 107084716, + "step": 1104 + }, + { + "epoch": 0.10793899100508408, + "loss": 0.14299878478050232, + "loss_ce": 0.0063410792499780655, + "loss_iou": 0.337890625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 107084716, + "step": 1104 + }, + { + "epoch": 0.10803676183026985, + "grad_norm": 19.94677724748721, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 107182456, + "step": 1105 + }, + { + "epoch": 0.10803676183026985, + "loss": 0.10743888467550278, + "loss_ce": 0.006181560456752777, + "loss_iou": 0.5078125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 107182456, + "step": 1105 + }, + { + "epoch": 0.10813453265545561, + "grad_norm": 4.174837165029177, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 107279340, + "step": 1106 + }, + { + "epoch": 0.10813453265545561, + "loss": 0.13830575346946716, + "loss_ce": 0.007293802220374346, + "loss_iou": 0.46484375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 107279340, + "step": 1106 + }, + { + "epoch": 0.10823230348064138, + "grad_norm": 10.292848576159559, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 107376424, + "step": 1107 + }, + { + "epoch": 0.10823230348064138, + "loss": 0.1043500080704689, + "loss_ce": 0.0012921446468681097, + "loss_iou": 0.37890625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 107376424, + "step": 1107 + }, + { + "epoch": 0.10833007430582714, + "grad_norm": 10.298473801451761, + "learning_rate": 5e-05, + "loss": 0.154, + "num_input_tokens_seen": 107472020, + "step": 1108 + }, + { + "epoch": 0.10833007430582714, + "loss": 0.16684047877788544, + "loss_ce": 0.005097317509353161, + "loss_iou": 0.376953125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 107472020, + "step": 1108 + }, + { + "epoch": 0.1084278451310129, + "grad_norm": 8.239875847175387, + "learning_rate": 5e-05, + "loss": 0.1306, + "num_input_tokens_seen": 107568688, + "step": 1109 + }, + { + "epoch": 0.1084278451310129, + "loss": 0.1111314445734024, + "loss_ce": 0.004548795521259308, + "loss_iou": 0.404296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 107568688, + "step": 1109 + }, + { + "epoch": 0.10852561595619867, + "grad_norm": 11.277863721426133, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 107665160, + "step": 1110 + }, + { + "epoch": 0.10852561595619867, + "loss": 0.09753502160310745, + "loss_ce": 0.0063332365825772285, + "loss_iou": 0.46875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 107665160, + "step": 1110 + }, + { + "epoch": 0.10862338678138443, + "grad_norm": 16.680686628614264, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 107761376, + "step": 1111 + }, + { + "epoch": 0.10862338678138443, + "loss": 0.07084885239601135, + "loss_ce": 0.006761941127479076, + "loss_iou": 0.353515625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 107761376, + "step": 1111 + }, + { + "epoch": 0.1087211576065702, + "grad_norm": 9.70957479282309, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 107857624, + "step": 1112 + }, + { + "epoch": 0.1087211576065702, + "loss": 0.08534826338291168, + "loss_ce": 0.0019742408767342567, + "loss_iou": 0.361328125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 107857624, + "step": 1112 + }, + { + "epoch": 0.10881892843175596, + "grad_norm": 11.01349535340546, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 107954756, + "step": 1113 + }, + { + "epoch": 0.10881892843175596, + "loss": 0.0629655197262764, + "loss_ce": 0.008197907358407974, + "loss_iou": 0.283203125, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 107954756, + "step": 1113 + }, + { + "epoch": 0.10891669925694172, + "grad_norm": 15.601085368570425, + "learning_rate": 5e-05, + "loss": 0.1462, + "num_input_tokens_seen": 108051240, + "step": 1114 + }, + { + "epoch": 0.10891669925694172, + "loss": 0.14101165533065796, + "loss_ce": 0.005635673180222511, + "loss_iou": 0.31640625, + "loss_num": 0.0269775390625, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 108051240, + "step": 1114 + }, + { + "epoch": 0.10901447008212749, + "grad_norm": 3.9154030860000053, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 108149136, + "step": 1115 + }, + { + "epoch": 0.10901447008212749, + "loss": 0.11567261815071106, + "loss_ce": 0.006099254358559847, + "loss_iou": 0.40234375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 108149136, + "step": 1115 + }, + { + "epoch": 0.10911224090731325, + "grad_norm": 10.767663827067093, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 108245912, + "step": 1116 + }, + { + "epoch": 0.10911224090731325, + "loss": 0.12052641063928604, + "loss_ce": 0.009961232542991638, + "loss_iou": 0.357421875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 108245912, + "step": 1116 + }, + { + "epoch": 0.10921001173249902, + "grad_norm": 18.786376308728, + "learning_rate": 5e-05, + "loss": 0.1308, + "num_input_tokens_seen": 108342068, + "step": 1117 + }, + { + "epoch": 0.10921001173249902, + "loss": 0.11935289204120636, + "loss_ce": 0.005339217372238636, + "loss_iou": 0.361328125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 108342068, + "step": 1117 + }, + { + "epoch": 0.1093077825576848, + "grad_norm": 10.27037638814583, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 108438804, + "step": 1118 + }, + { + "epoch": 0.1093077825576848, + "loss": 0.09902255237102509, + "loss_ce": 0.003014253219589591, + "loss_iou": 0.39453125, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 108438804, + "step": 1118 + }, + { + "epoch": 0.10940555338287056, + "grad_norm": 6.231997950985389, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 108535980, + "step": 1119 + }, + { + "epoch": 0.10940555338287056, + "loss": 0.09988903254270554, + "loss_ce": 0.006718868855386972, + "loss_iou": 0.50390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 108535980, + "step": 1119 + }, + { + "epoch": 0.10950332420805632, + "grad_norm": 15.74685545438501, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 108633084, + "step": 1120 + }, + { + "epoch": 0.10950332420805632, + "loss": 0.09043734520673752, + "loss_ce": 0.00419466570019722, + "loss_iou": 0.4140625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 108633084, + "step": 1120 + }, + { + "epoch": 0.10960109503324209, + "grad_norm": 23.431115045251005, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 108729644, + "step": 1121 + }, + { + "epoch": 0.10960109503324209, + "loss": 0.12352659553289413, + "loss_ce": 0.0060339197516441345, + "loss_iou": 0.37890625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 108729644, + "step": 1121 + }, + { + "epoch": 0.10969886585842785, + "grad_norm": 21.18542568943438, + "learning_rate": 5e-05, + "loss": 0.1667, + "num_input_tokens_seen": 108826960, + "step": 1122 + }, + { + "epoch": 0.10969886585842785, + "loss": 0.12595024704933167, + "loss_ce": 0.0017589644994586706, + "loss_iou": 0.48046875, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 108826960, + "step": 1122 + }, + { + "epoch": 0.10979663668361361, + "grad_norm": 11.432924980887837, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 108923592, + "step": 1123 + }, + { + "epoch": 0.10979663668361361, + "loss": 0.06425324082374573, + "loss_ce": 0.002180487848818302, + "loss_iou": 0.318359375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 108923592, + "step": 1123 + }, + { + "epoch": 0.10989440750879938, + "grad_norm": 7.1546910911532215, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 109019540, + "step": 1124 + }, + { + "epoch": 0.10989440750879938, + "loss": 0.09144139289855957, + "loss_ce": 0.007228141650557518, + "loss_iou": 0.34765625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 109019540, + "step": 1124 + }, + { + "epoch": 0.10999217833398514, + "grad_norm": 13.364360991096977, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 109116884, + "step": 1125 + }, + { + "epoch": 0.10999217833398514, + "loss": 0.10751120746135712, + "loss_ce": 0.006009742151945829, + "loss_iou": 0.296875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 109116884, + "step": 1125 + }, + { + "epoch": 0.1100899491591709, + "grad_norm": 9.828461289381051, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 109214272, + "step": 1126 + }, + { + "epoch": 0.1100899491591709, + "loss": 0.11201644688844681, + "loss_ce": 0.00874495878815651, + "loss_iou": 0.3046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 109214272, + "step": 1126 + }, + { + "epoch": 0.11018771998435667, + "grad_norm": 2.4105440527204465, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 109310044, + "step": 1127 + }, + { + "epoch": 0.11018771998435667, + "loss": 0.08552089333534241, + "loss_ce": 0.010211135260760784, + "loss_iou": 0.25390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 109310044, + "step": 1127 + }, + { + "epoch": 0.11028549080954243, + "grad_norm": 9.3616259931336, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 109406896, + "step": 1128 + }, + { + "epoch": 0.11028549080954243, + "loss": 0.054699596017599106, + "loss_ce": 0.005337412003427744, + "loss_iou": 0.421875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 109406896, + "step": 1128 + }, + { + "epoch": 0.1103832616347282, + "grad_norm": 20.28157753819896, + "learning_rate": 5e-05, + "loss": 0.1315, + "num_input_tokens_seen": 109503052, + "step": 1129 + }, + { + "epoch": 0.1103832616347282, + "loss": 0.11608919501304626, + "loss_ce": 0.0017703501507639885, + "loss_iou": 0.380859375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 109503052, + "step": 1129 + }, + { + "epoch": 0.11048103245991396, + "grad_norm": 6.350212766674091, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 109600716, + "step": 1130 + }, + { + "epoch": 0.11048103245991396, + "loss": 0.07422591745853424, + "loss_ce": 0.006263271439820528, + "loss_iou": 0.453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 109600716, + "step": 1130 + }, + { + "epoch": 0.11057880328509972, + "grad_norm": 3.8094613810060065, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 109697984, + "step": 1131 + }, + { + "epoch": 0.11057880328509972, + "loss": 0.1202152669429779, + "loss_ce": 0.0057438272051513195, + "loss_iou": 0.291015625, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 109697984, + "step": 1131 + }, + { + "epoch": 0.11067657411028549, + "grad_norm": 15.287209585921927, + "learning_rate": 5e-05, + "loss": 0.1238, + "num_input_tokens_seen": 109795356, + "step": 1132 + }, + { + "epoch": 0.11067657411028549, + "loss": 0.09602990746498108, + "loss_ce": 0.0034548353869467974, + "loss_iou": 0.46484375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 109795356, + "step": 1132 + }, + { + "epoch": 0.11077434493547125, + "grad_norm": 9.802455088878217, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 109891764, + "step": 1133 + }, + { + "epoch": 0.11077434493547125, + "loss": 0.07166771590709686, + "loss_ce": 0.007153560407459736, + "loss_iou": 0.2890625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 109891764, + "step": 1133 + }, + { + "epoch": 0.11087211576065702, + "grad_norm": 32.0814057661453, + "learning_rate": 5e-05, + "loss": 0.133, + "num_input_tokens_seen": 109988628, + "step": 1134 + }, + { + "epoch": 0.11087211576065702, + "loss": 0.15967994928359985, + "loss_ce": 0.012096925638616085, + "loss_iou": 0.359375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 109988628, + "step": 1134 + }, + { + "epoch": 0.11096988658584278, + "grad_norm": 26.609628248508532, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 110085424, + "step": 1135 + }, + { + "epoch": 0.11096988658584278, + "loss": 0.061459578573703766, + "loss_ce": 0.00509361457079649, + "loss_iou": 0.279296875, + "loss_num": 0.01123046875, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 110085424, + "step": 1135 + }, + { + "epoch": 0.11106765741102854, + "grad_norm": 10.693997273061798, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 110182172, + "step": 1136 + }, + { + "epoch": 0.11106765741102854, + "loss": 0.0795397013425827, + "loss_ce": 0.006541648879647255, + "loss_iou": 0.34765625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 110182172, + "step": 1136 + }, + { + "epoch": 0.11116542823621431, + "grad_norm": 3.404817242187217, + "learning_rate": 5e-05, + "loss": 0.1271, + "num_input_tokens_seen": 110279268, + "step": 1137 + }, + { + "epoch": 0.11116542823621431, + "loss": 0.10068699717521667, + "loss_ce": 0.0066165560856461525, + "loss_iou": 0.30859375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 110279268, + "step": 1137 + }, + { + "epoch": 0.11126319906140007, + "grad_norm": 5.1760499525493495, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 110375440, + "step": 1138 + }, + { + "epoch": 0.11126319906140007, + "loss": 0.10746248811483383, + "loss_ce": 0.004770837724208832, + "loss_iou": 0.2197265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 110375440, + "step": 1138 + }, + { + "epoch": 0.11136096988658584, + "grad_norm": 18.86325228036966, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 110472856, + "step": 1139 + }, + { + "epoch": 0.11136096988658584, + "loss": 0.16800816357135773, + "loss_ce": 0.007363626733422279, + "loss_iou": 0.373046875, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 110472856, + "step": 1139 + }, + { + "epoch": 0.1114587407117716, + "grad_norm": 40.41009151927238, + "learning_rate": 5e-05, + "loss": 0.1293, + "num_input_tokens_seen": 110569048, + "step": 1140 + }, + { + "epoch": 0.1114587407117716, + "loss": 0.09225885570049286, + "loss_ce": 0.003788395319133997, + "loss_iou": 0.34765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 110569048, + "step": 1140 + }, + { + "epoch": 0.11155651153695738, + "grad_norm": 16.209184879264058, + "learning_rate": 5e-05, + "loss": 0.1341, + "num_input_tokens_seen": 110666144, + "step": 1141 + }, + { + "epoch": 0.11155651153695738, + "loss": 0.11558592319488525, + "loss_ce": 0.005295392591506243, + "loss_iou": 0.46875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 110666144, + "step": 1141 + }, + { + "epoch": 0.11165428236214314, + "grad_norm": 5.899101267817121, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 110762448, + "step": 1142 + }, + { + "epoch": 0.11165428236214314, + "loss": 0.08180175721645355, + "loss_ce": 0.005690912250429392, + "loss_iou": 0.380859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 110762448, + "step": 1142 + }, + { + "epoch": 0.1117520531873289, + "grad_norm": 16.24268508849289, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 110859960, + "step": 1143 + }, + { + "epoch": 0.1117520531873289, + "loss": 0.11212022602558136, + "loss_ce": 0.002195911016315222, + "loss_iou": 0.486328125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 110859960, + "step": 1143 + }, + { + "epoch": 0.11184982401251467, + "grad_norm": 30.401391761404593, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 110956936, + "step": 1144 + }, + { + "epoch": 0.11184982401251467, + "loss": 0.07246354967355728, + "loss_ce": 0.004653489217162132, + "loss_iou": 0.34375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 110956936, + "step": 1144 + }, + { + "epoch": 0.11194759483770043, + "grad_norm": 7.318941269589738, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 111054060, + "step": 1145 + }, + { + "epoch": 0.11194759483770043, + "loss": 0.10376925021409988, + "loss_ce": 0.0032748631201684475, + "loss_iou": 0.46484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 111054060, + "step": 1145 + }, + { + "epoch": 0.1120453656628862, + "grad_norm": 4.691898019680607, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 111150296, + "step": 1146 + }, + { + "epoch": 0.1120453656628862, + "loss": 0.14340540766716003, + "loss_ce": 0.007571677211672068, + "loss_iou": 0.330078125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 111150296, + "step": 1146 + }, + { + "epoch": 0.11214313648807196, + "grad_norm": 20.64335183724696, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 111248020, + "step": 1147 + }, + { + "epoch": 0.11214313648807196, + "loss": 0.1613452285528183, + "loss_ce": 0.009642349556088448, + "loss_iou": 0.431640625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 111248020, + "step": 1147 + }, + { + "epoch": 0.11224090731325773, + "grad_norm": 33.44596701597036, + "learning_rate": 5e-05, + "loss": 0.1639, + "num_input_tokens_seen": 111345748, + "step": 1148 + }, + { + "epoch": 0.11224090731325773, + "loss": 0.17271849513053894, + "loss_ce": 0.00859496183693409, + "loss_iou": 0.458984375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 111345748, + "step": 1148 + }, + { + "epoch": 0.11233867813844349, + "grad_norm": 10.750840774237192, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 111443200, + "step": 1149 + }, + { + "epoch": 0.11233867813844349, + "loss": 0.09880012273788452, + "loss_ce": 0.005752026103436947, + "loss_iou": 0.38671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 111443200, + "step": 1149 + }, + { + "epoch": 0.11243644896362925, + "grad_norm": 11.25132386648193, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 111539544, + "step": 1150 + }, + { + "epoch": 0.11243644896362925, + "loss": 0.1008324921131134, + "loss_ce": 0.006662874948233366, + "loss_iou": 0.3046875, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 111539544, + "step": 1150 + }, + { + "epoch": 0.11253421978881502, + "grad_norm": 4.824025958821543, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 111636764, + "step": 1151 + }, + { + "epoch": 0.11253421978881502, + "loss": 0.0702013149857521, + "loss_ce": 0.00429860595613718, + "loss_iou": 0.380859375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 111636764, + "step": 1151 + }, + { + "epoch": 0.11263199061400078, + "grad_norm": 5.310026424877841, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 111734244, + "step": 1152 + }, + { + "epoch": 0.11263199061400078, + "loss": 0.16099946200847626, + "loss_ce": 0.014240427874028683, + "loss_iou": 0.390625, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 111734244, + "step": 1152 + }, + { + "epoch": 0.11272976143918655, + "grad_norm": 14.983616429167895, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 111831312, + "step": 1153 + }, + { + "epoch": 0.11272976143918655, + "loss": 0.13295570015907288, + "loss_ce": 0.005117565393447876, + "loss_iou": 0.392578125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 111831312, + "step": 1153 + }, + { + "epoch": 0.11282753226437231, + "grad_norm": 14.179515035464554, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 111928124, + "step": 1154 + }, + { + "epoch": 0.11282753226437231, + "loss": 0.10252001136541367, + "loss_ce": 0.014019038528203964, + "loss_iou": 0.455078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 111928124, + "step": 1154 + }, + { + "epoch": 0.11292530308955807, + "grad_norm": 18.679670555765778, + "learning_rate": 5e-05, + "loss": 0.1352, + "num_input_tokens_seen": 112025472, + "step": 1155 + }, + { + "epoch": 0.11292530308955807, + "loss": 0.08615998178720474, + "loss_ce": 0.005227362271398306, + "loss_iou": 0.515625, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 112025472, + "step": 1155 + }, + { + "epoch": 0.11302307391474384, + "grad_norm": 8.431609344916481, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 112122428, + "step": 1156 + }, + { + "epoch": 0.11302307391474384, + "loss": 0.09352698177099228, + "loss_ce": 0.0036527132615447044, + "loss_iou": 0.48046875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 112122428, + "step": 1156 + }, + { + "epoch": 0.1131208447399296, + "grad_norm": 12.015405815738267, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 112220748, + "step": 1157 + }, + { + "epoch": 0.1131208447399296, + "loss": 0.12016185373067856, + "loss_ce": 0.00540049746632576, + "loss_iou": 0.408203125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 112220748, + "step": 1157 + }, + { + "epoch": 0.11321861556511537, + "grad_norm": 25.977987800211178, + "learning_rate": 5e-05, + "loss": 0.1721, + "num_input_tokens_seen": 112316640, + "step": 1158 + }, + { + "epoch": 0.11321861556511537, + "loss": 0.21848958730697632, + "loss_ce": 0.006758630275726318, + "loss_iou": 0.3828125, + "loss_num": 0.042236328125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 112316640, + "step": 1158 + }, + { + "epoch": 0.11331638639030113, + "grad_norm": 6.3573067585919, + "learning_rate": 5e-05, + "loss": 0.1313, + "num_input_tokens_seen": 112413956, + "step": 1159 + }, + { + "epoch": 0.11331638639030113, + "loss": 0.1813259720802307, + "loss_ce": 0.007986120879650116, + "loss_iou": 0.33984375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 112413956, + "step": 1159 + }, + { + "epoch": 0.1134141572154869, + "grad_norm": 9.606156723499279, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 112510876, + "step": 1160 + }, + { + "epoch": 0.1134141572154869, + "loss": 0.07794559001922607, + "loss_ce": 0.004520300775766373, + "loss_iou": 0.427734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 112510876, + "step": 1160 + }, + { + "epoch": 0.11351192804067266, + "grad_norm": 5.554750476228345, + "learning_rate": 5e-05, + "loss": 0.157, + "num_input_tokens_seen": 112607900, + "step": 1161 + }, + { + "epoch": 0.11351192804067266, + "loss": 0.17836952209472656, + "loss_ce": 0.008356109261512756, + "loss_iou": 0.49609375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 112607900, + "step": 1161 + }, + { + "epoch": 0.11360969886585842, + "grad_norm": 30.97135971501427, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 112705736, + "step": 1162 + }, + { + "epoch": 0.11360969886585842, + "loss": 0.11498059332370758, + "loss_ce": 0.007253535557538271, + "loss_iou": 0.482421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 112705736, + "step": 1162 + }, + { + "epoch": 0.11370746969104419, + "grad_norm": 11.356437807454121, + "learning_rate": 5e-05, + "loss": 0.1699, + "num_input_tokens_seen": 112802252, + "step": 1163 + }, + { + "epoch": 0.11370746969104419, + "loss": 0.1938321590423584, + "loss_ce": 0.006362679414451122, + "loss_iou": 0.404296875, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 112802252, + "step": 1163 + }, + { + "epoch": 0.11380524051622996, + "grad_norm": 7.8802597114544275, + "learning_rate": 5e-05, + "loss": 0.1447, + "num_input_tokens_seen": 112899848, + "step": 1164 + }, + { + "epoch": 0.11380524051622996, + "loss": 0.12681283056735992, + "loss_ce": 0.0059327068738639355, + "loss_iou": 0.400390625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 112899848, + "step": 1164 + }, + { + "epoch": 0.11390301134141573, + "grad_norm": 5.658268148126781, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 112997412, + "step": 1165 + }, + { + "epoch": 0.11390301134141573, + "loss": 0.1843569278717041, + "loss_ce": 0.0053713275119662285, + "loss_iou": 0.494140625, + "loss_num": 0.035888671875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 112997412, + "step": 1165 + }, + { + "epoch": 0.11400078216660149, + "grad_norm": 13.277988643872115, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 113094536, + "step": 1166 + }, + { + "epoch": 0.11400078216660149, + "loss": 0.07440026849508286, + "loss_ce": 0.0033400901593267918, + "loss_iou": 0.412109375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 113094536, + "step": 1166 + }, + { + "epoch": 0.11409855299178726, + "grad_norm": 6.815568533086914, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 113190456, + "step": 1167 + }, + { + "epoch": 0.11409855299178726, + "loss": 0.0971897691488266, + "loss_ce": 0.006262641400098801, + "loss_iou": 0.255859375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 113190456, + "step": 1167 + }, + { + "epoch": 0.11419632381697302, + "grad_norm": 12.088365003594415, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 113287532, + "step": 1168 + }, + { + "epoch": 0.11419632381697302, + "loss": 0.09844723343849182, + "loss_ce": 0.0015539187006652355, + "loss_iou": 0.33984375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 113287532, + "step": 1168 + }, + { + "epoch": 0.11429409464215878, + "grad_norm": 8.715472531665341, + "learning_rate": 5e-05, + "loss": 0.1351, + "num_input_tokens_seen": 113384868, + "step": 1169 + }, + { + "epoch": 0.11429409464215878, + "loss": 0.17301437258720398, + "loss_ce": 0.007914284244179726, + "loss_iou": 0.2734375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 113384868, + "step": 1169 + }, + { + "epoch": 0.11439186546734455, + "grad_norm": 6.613608701150631, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 113482996, + "step": 1170 + }, + { + "epoch": 0.11439186546734455, + "loss": 0.10807996988296509, + "loss_ce": 0.009691296145319939, + "loss_iou": 0.3671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 113482996, + "step": 1170 + }, + { + "epoch": 0.11448963629253031, + "grad_norm": 5.417755687916138, + "learning_rate": 5e-05, + "loss": 0.1215, + "num_input_tokens_seen": 113579544, + "step": 1171 + }, + { + "epoch": 0.11448963629253031, + "loss": 0.14301589131355286, + "loss_ce": 0.006281892769038677, + "loss_iou": 0.30078125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 113579544, + "step": 1171 + }, + { + "epoch": 0.11458740711771608, + "grad_norm": 14.032181151946643, + "learning_rate": 5e-05, + "loss": 0.1393, + "num_input_tokens_seen": 113676052, + "step": 1172 + }, + { + "epoch": 0.11458740711771608, + "loss": 0.11262063682079315, + "loss_ce": 0.001155185978859663, + "loss_iou": 0.40234375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 113676052, + "step": 1172 + }, + { + "epoch": 0.11468517794290184, + "grad_norm": 13.807669598708234, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 113773292, + "step": 1173 + }, + { + "epoch": 0.11468517794290184, + "loss": 0.1391674280166626, + "loss_ce": 0.008887888863682747, + "loss_iou": 0.400390625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 113773292, + "step": 1173 + }, + { + "epoch": 0.1147829487680876, + "grad_norm": 8.586258928904186, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 113870476, + "step": 1174 + }, + { + "epoch": 0.1147829487680876, + "loss": 0.10519622266292572, + "loss_ce": 0.00442717969417572, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 113870476, + "step": 1174 + }, + { + "epoch": 0.11488071959327337, + "grad_norm": 18.108153815733026, + "learning_rate": 5e-05, + "loss": 0.1221, + "num_input_tokens_seen": 113967796, + "step": 1175 + }, + { + "epoch": 0.11488071959327337, + "loss": 0.14492274820804596, + "loss_ce": 0.007471577264368534, + "loss_iou": 0.48046875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 113967796, + "step": 1175 + }, + { + "epoch": 0.11497849041845913, + "grad_norm": 55.70609909487977, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 114065784, + "step": 1176 + }, + { + "epoch": 0.11497849041845913, + "loss": 0.10357975959777832, + "loss_ce": 0.004122967831790447, + "loss_iou": 0.3984375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 114065784, + "step": 1176 + }, + { + "epoch": 0.1150762612436449, + "grad_norm": 12.856997967314662, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 114163872, + "step": 1177 + }, + { + "epoch": 0.1150762612436449, + "loss": 0.15396836400032043, + "loss_ce": 0.005775002762675285, + "loss_iou": 0.419921875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 114163872, + "step": 1177 + }, + { + "epoch": 0.11517403206883066, + "grad_norm": 13.466302439491542, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 114261440, + "step": 1178 + }, + { + "epoch": 0.11517403206883066, + "loss": 0.11224067211151123, + "loss_ce": 0.004574651829898357, + "loss_iou": 0.35546875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 114261440, + "step": 1178 + }, + { + "epoch": 0.11527180289401642, + "grad_norm": 16.82238338493437, + "learning_rate": 5e-05, + "loss": 0.131, + "num_input_tokens_seen": 114359232, + "step": 1179 + }, + { + "epoch": 0.11527180289401642, + "loss": 0.11767332255840302, + "loss_ce": 0.012113017030060291, + "loss_iou": 0.455078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 114359232, + "step": 1179 + }, + { + "epoch": 0.11536957371920219, + "grad_norm": 17.106319888486226, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 114456244, + "step": 1180 + }, + { + "epoch": 0.11536957371920219, + "loss": 0.0856657475233078, + "loss_ce": 0.0036344982217997313, + "loss_iou": 0.30078125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 114456244, + "step": 1180 + }, + { + "epoch": 0.11546734454438795, + "grad_norm": 10.938525913986346, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 114553332, + "step": 1181 + }, + { + "epoch": 0.11546734454438795, + "loss": 0.11372549086809158, + "loss_ce": 0.006524864584207535, + "loss_iou": 0.263671875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 114553332, + "step": 1181 + }, + { + "epoch": 0.11556511536957371, + "grad_norm": 5.987621120606118, + "learning_rate": 5e-05, + "loss": 0.1339, + "num_input_tokens_seen": 114650104, + "step": 1182 + }, + { + "epoch": 0.11556511536957371, + "loss": 0.1329086422920227, + "loss_ce": 0.004368597641587257, + "loss_iou": 0.26953125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 114650104, + "step": 1182 + }, + { + "epoch": 0.11566288619475948, + "grad_norm": 3.59294568843427, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 114746880, + "step": 1183 + }, + { + "epoch": 0.11566288619475948, + "loss": 0.09787857532501221, + "loss_ce": 0.004098054952919483, + "loss_iou": 0.302734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 114746880, + "step": 1183 + }, + { + "epoch": 0.11576065701994524, + "grad_norm": 4.412401860683066, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 114842472, + "step": 1184 + }, + { + "epoch": 0.11576065701994524, + "loss": 0.08608623594045639, + "loss_ce": 0.0031089456751942635, + "loss_iou": 0.36328125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 114842472, + "step": 1184 + }, + { + "epoch": 0.115858427845131, + "grad_norm": 23.014393966025835, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 114938988, + "step": 1185 + }, + { + "epoch": 0.115858427845131, + "loss": 0.14550426602363586, + "loss_ce": 0.0023768278770148754, + "loss_iou": 0.453125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 114938988, + "step": 1185 + }, + { + "epoch": 0.11595619867031677, + "grad_norm": 26.277193344158995, + "learning_rate": 5e-05, + "loss": 0.1479, + "num_input_tokens_seen": 115034720, + "step": 1186 + }, + { + "epoch": 0.11595619867031677, + "loss": 0.16666902601718903, + "loss_ce": 0.00743592856451869, + "loss_iou": 0.283203125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 115034720, + "step": 1186 + }, + { + "epoch": 0.11605396949550255, + "grad_norm": 8.971274747967625, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 115132492, + "step": 1187 + }, + { + "epoch": 0.11605396949550255, + "loss": 0.12708601355552673, + "loss_ce": 0.006129581481218338, + "loss_iou": 0.431640625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 115132492, + "step": 1187 + }, + { + "epoch": 0.11615174032068831, + "grad_norm": 5.9255998136323225, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 115227528, + "step": 1188 + }, + { + "epoch": 0.11615174032068831, + "loss": 0.07180342823266983, + "loss_ce": 0.003234242554754019, + "loss_iou": 0.28125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 115227528, + "step": 1188 + }, + { + "epoch": 0.11624951114587408, + "grad_norm": 17.453169982957096, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 115323776, + "step": 1189 + }, + { + "epoch": 0.11624951114587408, + "loss": 0.07396169006824493, + "loss_ce": 0.007311299443244934, + "loss_iou": 0.37890625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 115323776, + "step": 1189 + }, + { + "epoch": 0.11634728197105984, + "grad_norm": 26.653478315712714, + "learning_rate": 5e-05, + "loss": 0.1967, + "num_input_tokens_seen": 115420968, + "step": 1190 + }, + { + "epoch": 0.11634728197105984, + "loss": 0.21555812656879425, + "loss_ce": 0.006146508734673262, + "loss_iou": 0.3203125, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 115420968, + "step": 1190 + }, + { + "epoch": 0.1164450527962456, + "grad_norm": 5.405668871442319, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 115518284, + "step": 1191 + }, + { + "epoch": 0.1164450527962456, + "loss": 0.09034286439418793, + "loss_ce": 0.009654391556978226, + "loss_iou": 0.36328125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 115518284, + "step": 1191 + }, + { + "epoch": 0.11654282362143137, + "grad_norm": 3.950694786638003, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 115615556, + "step": 1192 + }, + { + "epoch": 0.11654282362143137, + "loss": 0.08635316789150238, + "loss_ce": 0.004443984944373369, + "loss_iou": 0.361328125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 115615556, + "step": 1192 + }, + { + "epoch": 0.11664059444661713, + "grad_norm": 8.880511192228097, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 115711996, + "step": 1193 + }, + { + "epoch": 0.11664059444661713, + "loss": 0.0737478956580162, + "loss_ce": 0.0024893498048186302, + "loss_iou": 0.44140625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 115711996, + "step": 1193 + }, + { + "epoch": 0.1167383652718029, + "grad_norm": 6.141065599991541, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 115808260, + "step": 1194 + }, + { + "epoch": 0.1167383652718029, + "loss": 0.12823031842708588, + "loss_ce": 0.006068442482501268, + "loss_iou": 0.376953125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 115808260, + "step": 1194 + }, + { + "epoch": 0.11683613609698866, + "grad_norm": 17.432486193790748, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 115905240, + "step": 1195 + }, + { + "epoch": 0.11683613609698866, + "loss": 0.12332884967327118, + "loss_ce": 0.00165526510681957, + "loss_iou": 0.56640625, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 115905240, + "step": 1195 + }, + { + "epoch": 0.11693390692217442, + "grad_norm": 31.75148103548481, + "learning_rate": 5e-05, + "loss": 0.1298, + "num_input_tokens_seen": 116002128, + "step": 1196 + }, + { + "epoch": 0.11693390692217442, + "loss": 0.13567432761192322, + "loss_ce": 0.01015552505850792, + "loss_iou": 0.392578125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 116002128, + "step": 1196 + }, + { + "epoch": 0.11703167774736019, + "grad_norm": 7.198269864007056, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 116099408, + "step": 1197 + }, + { + "epoch": 0.11703167774736019, + "loss": 0.09815014898777008, + "loss_ce": 0.006139654666185379, + "loss_iou": 0.384765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 116099408, + "step": 1197 + }, + { + "epoch": 0.11712944857254595, + "grad_norm": 20.774385589155177, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 116196296, + "step": 1198 + }, + { + "epoch": 0.11712944857254595, + "loss": 0.10275450348854065, + "loss_ce": 0.003145132679492235, + "loss_iou": 0.462890625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 116196296, + "step": 1198 + }, + { + "epoch": 0.11722721939773172, + "grad_norm": 4.296689485494997, + "learning_rate": 5e-05, + "loss": 0.1297, + "num_input_tokens_seen": 116293380, + "step": 1199 + }, + { + "epoch": 0.11722721939773172, + "loss": 0.14829087257385254, + "loss_ce": 0.008856061846017838, + "loss_iou": 0.427734375, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 116293380, + "step": 1199 + }, + { + "epoch": 0.11732499022291748, + "grad_norm": 16.5072289033747, + "learning_rate": 5e-05, + "loss": 0.1616, + "num_input_tokens_seen": 116390612, + "step": 1200 + }, + { + "epoch": 0.11732499022291748, + "loss": 0.17571648955345154, + "loss_ce": 0.006466004066169262, + "loss_iou": 0.3125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 116390612, + "step": 1200 + }, + { + "epoch": 0.11742276104810324, + "grad_norm": 11.350113631639621, + "learning_rate": 5e-05, + "loss": 0.1289, + "num_input_tokens_seen": 116487336, + "step": 1201 + }, + { + "epoch": 0.11742276104810324, + "loss": 0.1008194088935852, + "loss_ce": 0.004917918238788843, + "loss_iou": 0.35546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 116487336, + "step": 1201 + }, + { + "epoch": 0.11752053187328901, + "grad_norm": 8.76641542303407, + "learning_rate": 5e-05, + "loss": 0.1551, + "num_input_tokens_seen": 116584320, + "step": 1202 + }, + { + "epoch": 0.11752053187328901, + "loss": 0.14384685456752777, + "loss_ce": 0.009142259135842323, + "loss_iou": 0.423828125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 116584320, + "step": 1202 + }, + { + "epoch": 0.11761830269847477, + "grad_norm": 16.63817372779811, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 116681060, + "step": 1203 + }, + { + "epoch": 0.11761830269847477, + "loss": 0.08721575140953064, + "loss_ce": 0.003658620174974203, + "loss_iou": 0.388671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 116681060, + "step": 1203 + }, + { + "epoch": 0.11771607352366054, + "grad_norm": 18.378316405368658, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 116778980, + "step": 1204 + }, + { + "epoch": 0.11771607352366054, + "loss": 0.058896731585264206, + "loss_ce": 0.0024849860928952694, + "loss_iou": 0.412109375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 116778980, + "step": 1204 + }, + { + "epoch": 0.1178138443488463, + "grad_norm": 17.246022870014325, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 116876024, + "step": 1205 + }, + { + "epoch": 0.1178138443488463, + "loss": 0.09726820141077042, + "loss_ce": 0.006966686807572842, + "loss_iou": 0.515625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 116876024, + "step": 1205 + }, + { + "epoch": 0.11791161517403206, + "grad_norm": 16.913649471287208, + "learning_rate": 5e-05, + "loss": 0.1346, + "num_input_tokens_seen": 116972092, + "step": 1206 + }, + { + "epoch": 0.11791161517403206, + "loss": 0.07992382347583771, + "loss_ce": 0.008222775533795357, + "loss_iou": 0.291015625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 116972092, + "step": 1206 + }, + { + "epoch": 0.11800938599921783, + "grad_norm": 16.677161089420586, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 117069128, + "step": 1207 + }, + { + "epoch": 0.11800938599921783, + "loss": 0.11241483688354492, + "loss_ce": 0.005984785966575146, + "loss_iou": 0.390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 117069128, + "step": 1207 + }, + { + "epoch": 0.11810715682440359, + "grad_norm": 10.902355017371876, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 117166912, + "step": 1208 + }, + { + "epoch": 0.11810715682440359, + "loss": 0.12457028031349182, + "loss_ce": 0.008481411263346672, + "loss_iou": 0.306640625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 117166912, + "step": 1208 + }, + { + "epoch": 0.11820492764958936, + "grad_norm": 18.014071000588466, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 117264600, + "step": 1209 + }, + { + "epoch": 0.11820492764958936, + "loss": 0.10808759927749634, + "loss_ce": 0.010828081518411636, + "loss_iou": 0.419921875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 117264600, + "step": 1209 + }, + { + "epoch": 0.11830269847477513, + "grad_norm": 44.0441386869418, + "learning_rate": 5e-05, + "loss": 0.127, + "num_input_tokens_seen": 117361920, + "step": 1210 + }, + { + "epoch": 0.11830269847477513, + "loss": 0.11906065046787262, + "loss_ce": 0.005626811645925045, + "loss_iou": 0.4140625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 117361920, + "step": 1210 + }, + { + "epoch": 0.1184004692999609, + "grad_norm": 10.796681478442133, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 117458236, + "step": 1211 + }, + { + "epoch": 0.1184004692999609, + "loss": 0.12750253081321716, + "loss_ce": 0.006591884419322014, + "loss_iou": 0.400390625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 117458236, + "step": 1211 + }, + { + "epoch": 0.11849824012514666, + "grad_norm": 5.908689839433929, + "learning_rate": 5e-05, + "loss": 0.1574, + "num_input_tokens_seen": 117555796, + "step": 1212 + }, + { + "epoch": 0.11849824012514666, + "loss": 0.188323512673378, + "loss_ce": 0.007110130041837692, + "loss_iou": 0.46484375, + "loss_num": 0.0361328125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 117555796, + "step": 1212 + }, + { + "epoch": 0.11859601095033243, + "grad_norm": 26.65756100185246, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 117652700, + "step": 1213 + }, + { + "epoch": 0.11859601095033243, + "loss": 0.14860057830810547, + "loss_ce": 0.007090579718351364, + "loss_iou": 0.314453125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 117652700, + "step": 1213 + }, + { + "epoch": 0.11869378177551819, + "grad_norm": 7.269745558833058, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 117749316, + "step": 1214 + }, + { + "epoch": 0.11869378177551819, + "loss": 0.10574330389499664, + "loss_ce": 0.0051573654636740685, + "loss_iou": 0.41015625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 117749316, + "step": 1214 + }, + { + "epoch": 0.11879155260070395, + "grad_norm": 7.59229234171667, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 117846372, + "step": 1215 + }, + { + "epoch": 0.11879155260070395, + "loss": 0.08735843002796173, + "loss_ce": 0.0067920200526714325, + "loss_iou": 0.458984375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 117846372, + "step": 1215 + }, + { + "epoch": 0.11888932342588972, + "grad_norm": 5.733589015883469, + "learning_rate": 5e-05, + "loss": 0.1435, + "num_input_tokens_seen": 117943780, + "step": 1216 + }, + { + "epoch": 0.11888932342588972, + "loss": 0.11736347526311874, + "loss_ce": 0.010201000608503819, + "loss_iou": 0.423828125, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 117943780, + "step": 1216 + }, + { + "epoch": 0.11898709425107548, + "grad_norm": 7.436347008153752, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 118042164, + "step": 1217 + }, + { + "epoch": 0.11898709425107548, + "loss": 0.07803300023078918, + "loss_ce": 0.00569108035415411, + "loss_iou": 0.447265625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 118042164, + "step": 1217 + }, + { + "epoch": 0.11908486507626125, + "grad_norm": 7.379216307133939, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 118138420, + "step": 1218 + }, + { + "epoch": 0.11908486507626125, + "loss": 0.11738508194684982, + "loss_ce": 0.006758859846740961, + "loss_iou": 0.4375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 118138420, + "step": 1218 + }, + { + "epoch": 0.11918263590144701, + "grad_norm": 3.748805767853414, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 118234504, + "step": 1219 + }, + { + "epoch": 0.11918263590144701, + "loss": 0.13669373095035553, + "loss_ce": 0.005056164693087339, + "loss_iou": 0.390625, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 118234504, + "step": 1219 + }, + { + "epoch": 0.11928040672663277, + "grad_norm": 4.7121409615113645, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 118332928, + "step": 1220 + }, + { + "epoch": 0.11928040672663277, + "loss": 0.09707017987966537, + "loss_ce": 0.0114073371514678, + "loss_iou": 0.41796875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 118332928, + "step": 1220 + }, + { + "epoch": 0.11937817755181854, + "grad_norm": 2.7626281183754204, + "learning_rate": 5e-05, + "loss": 0.1344, + "num_input_tokens_seen": 118430928, + "step": 1221 + }, + { + "epoch": 0.11937817755181854, + "loss": 0.14242830872535706, + "loss_ce": 0.006197843700647354, + "loss_iou": 0.353515625, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 118430928, + "step": 1221 + }, + { + "epoch": 0.1194759483770043, + "grad_norm": 7.9446985902445455, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 118528148, + "step": 1222 + }, + { + "epoch": 0.1194759483770043, + "loss": 0.09806307405233383, + "loss_ce": 0.007990440353751183, + "loss_iou": 0.388671875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 118528148, + "step": 1222 + }, + { + "epoch": 0.11957371920219007, + "grad_norm": 9.647729871701577, + "learning_rate": 5e-05, + "loss": 0.1476, + "num_input_tokens_seen": 118623144, + "step": 1223 + }, + { + "epoch": 0.11957371920219007, + "loss": 0.0814853236079216, + "loss_ce": 0.0034747652243822813, + "loss_iou": 0.24609375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 118623144, + "step": 1223 + }, + { + "epoch": 0.11967149002737583, + "grad_norm": 20.077922306109155, + "learning_rate": 5e-05, + "loss": 0.1438, + "num_input_tokens_seen": 118719520, + "step": 1224 + }, + { + "epoch": 0.11967149002737583, + "loss": 0.16479934751987457, + "loss_ce": 0.010105747729539871, + "loss_iou": 0.314453125, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 118719520, + "step": 1224 + }, + { + "epoch": 0.1197692608525616, + "grad_norm": 9.614140661197007, + "learning_rate": 5e-05, + "loss": 0.1516, + "num_input_tokens_seen": 118816612, + "step": 1225 + }, + { + "epoch": 0.1197692608525616, + "loss": 0.16696450114250183, + "loss_ce": 0.005236592143774033, + "loss_iou": 0.2734375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 118816612, + "step": 1225 + }, + { + "epoch": 0.11986703167774736, + "grad_norm": 15.485869675865752, + "learning_rate": 5e-05, + "loss": 0.1405, + "num_input_tokens_seen": 118913100, + "step": 1226 + }, + { + "epoch": 0.11986703167774736, + "loss": 0.13091400265693665, + "loss_ce": 0.003960893955081701, + "loss_iou": 0.421875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 118913100, + "step": 1226 + }, + { + "epoch": 0.11996480250293312, + "grad_norm": 30.411581783028332, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 119009772, + "step": 1227 + }, + { + "epoch": 0.11996480250293312, + "loss": 0.09368491917848587, + "loss_ce": 0.003352888859808445, + "loss_iou": 0.3828125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 119009772, + "step": 1227 + }, + { + "epoch": 0.12006257332811889, + "grad_norm": 10.537600645580207, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 119106760, + "step": 1228 + }, + { + "epoch": 0.12006257332811889, + "loss": 0.11883128434419632, + "loss_ce": 0.0038410527631640434, + "loss_iou": 0.4375, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 119106760, + "step": 1228 + }, + { + "epoch": 0.12016034415330465, + "grad_norm": 11.535718162224045, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 119203880, + "step": 1229 + }, + { + "epoch": 0.12016034415330465, + "loss": 0.08496113121509552, + "loss_ce": 0.002960401587188244, + "loss_iou": 0.3671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 119203880, + "step": 1229 + }, + { + "epoch": 0.12025811497849041, + "grad_norm": 4.492295943761103, + "learning_rate": 5e-05, + "loss": 0.1119, + "num_input_tokens_seen": 119300612, + "step": 1230 + }, + { + "epoch": 0.12025811497849041, + "loss": 0.11499559879302979, + "loss_ce": 0.0032402232754975557, + "loss_iou": 0.46484375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 119300612, + "step": 1230 + }, + { + "epoch": 0.12035588580367618, + "grad_norm": 3.593503833070329, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 119397352, + "step": 1231 + }, + { + "epoch": 0.12035588580367618, + "loss": 0.09927739948034286, + "loss_ce": 0.005741019733250141, + "loss_iou": 0.435546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 119397352, + "step": 1231 + }, + { + "epoch": 0.12045365662886194, + "grad_norm": 17.10307080024278, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 119494804, + "step": 1232 + }, + { + "epoch": 0.12045365662886194, + "loss": 0.10368705540895462, + "loss_ce": 0.005939250811934471, + "loss_iou": 0.388671875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 119494804, + "step": 1232 + }, + { + "epoch": 0.12055142745404772, + "grad_norm": 27.651738664867036, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 119591332, + "step": 1233 + }, + { + "epoch": 0.12055142745404772, + "loss": 0.13124527037143707, + "loss_ce": 0.011799471452832222, + "loss_iou": 0.462890625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 119591332, + "step": 1233 + }, + { + "epoch": 0.12064919827923348, + "grad_norm": 16.530174954081378, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 119688276, + "step": 1234 + }, + { + "epoch": 0.12064919827923348, + "loss": 0.10434708744287491, + "loss_ce": 0.003455973230302334, + "loss_iou": 0.431640625, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 119688276, + "step": 1234 + }, + { + "epoch": 0.12074696910441925, + "grad_norm": 15.54185557013907, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 119785628, + "step": 1235 + }, + { + "epoch": 0.12074696910441925, + "loss": 0.11684556305408478, + "loss_ce": 0.008676004596054554, + "loss_iou": 0.408203125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 119785628, + "step": 1235 + }, + { + "epoch": 0.12084473992960501, + "grad_norm": 5.5712717316324305, + "learning_rate": 5e-05, + "loss": 0.1468, + "num_input_tokens_seen": 119882712, + "step": 1236 + }, + { + "epoch": 0.12084473992960501, + "loss": 0.1503847986459732, + "loss_ce": 0.006280785892158747, + "loss_iou": 0.50390625, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 119882712, + "step": 1236 + }, + { + "epoch": 0.12094251075479077, + "grad_norm": 7.7349372791309445, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 119979632, + "step": 1237 + }, + { + "epoch": 0.12094251075479077, + "loss": 0.09782501310110092, + "loss_ce": 0.005936585366725922, + "loss_iou": 0.3984375, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 119979632, + "step": 1237 + }, + { + "epoch": 0.12104028157997654, + "grad_norm": 35.342548168357084, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 120076644, + "step": 1238 + }, + { + "epoch": 0.12104028157997654, + "loss": 0.08222398161888123, + "loss_ce": 0.0047093406319618225, + "loss_iou": 0.48046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 120076644, + "step": 1238 + }, + { + "epoch": 0.1211380524051623, + "grad_norm": 4.049892377885649, + "learning_rate": 5e-05, + "loss": 0.1452, + "num_input_tokens_seen": 120173512, + "step": 1239 + }, + { + "epoch": 0.1211380524051623, + "loss": 0.1589723527431488, + "loss_ce": 0.004828058648854494, + "loss_iou": 0.3359375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 120173512, + "step": 1239 + }, + { + "epoch": 0.12123582323034807, + "grad_norm": 6.90058450133666, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 120270964, + "step": 1240 + }, + { + "epoch": 0.12123582323034807, + "loss": 0.13986936211585999, + "loss_ce": 0.00739256339147687, + "loss_iou": 0.40234375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 120270964, + "step": 1240 + }, + { + "epoch": 0.12133359405553383, + "grad_norm": 20.25578477226417, + "learning_rate": 5e-05, + "loss": 0.1245, + "num_input_tokens_seen": 120366612, + "step": 1241 + }, + { + "epoch": 0.12133359405553383, + "loss": 0.11704184114933014, + "loss_ce": 0.008704432286322117, + "loss_iou": 0.39453125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 120366612, + "step": 1241 + }, + { + "epoch": 0.1214313648807196, + "grad_norm": 14.68214067324581, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 120463364, + "step": 1242 + }, + { + "epoch": 0.1214313648807196, + "loss": 0.1488456428050995, + "loss_ce": 0.012706717476248741, + "loss_iou": 0.40234375, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 120463364, + "step": 1242 + }, + { + "epoch": 0.12152913570590536, + "grad_norm": 6.855591122658768, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 120560376, + "step": 1243 + }, + { + "epoch": 0.12152913570590536, + "loss": 0.07372351735830307, + "loss_ce": 0.0014884085394442081, + "loss_iou": 0.462890625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 120560376, + "step": 1243 + }, + { + "epoch": 0.12162690653109112, + "grad_norm": 4.229552011901491, + "learning_rate": 5e-05, + "loss": 0.131, + "num_input_tokens_seen": 120657540, + "step": 1244 + }, + { + "epoch": 0.12162690653109112, + "loss": 0.14005041122436523, + "loss_ce": 0.004003036767244339, + "loss_iou": 0.404296875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 120657540, + "step": 1244 + }, + { + "epoch": 0.12172467735627689, + "grad_norm": 4.34081803097856, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 120753844, + "step": 1245 + }, + { + "epoch": 0.12172467735627689, + "loss": 0.12409910559654236, + "loss_ce": 0.005263646133244038, + "loss_iou": 0.275390625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 120753844, + "step": 1245 + }, + { + "epoch": 0.12182244818146265, + "grad_norm": 7.876419999592174, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 120850040, + "step": 1246 + }, + { + "epoch": 0.12182244818146265, + "loss": 0.12251804769039154, + "loss_ce": 0.005956160835921764, + "loss_iou": 0.416015625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 120850040, + "step": 1246 + }, + { + "epoch": 0.12192021900664841, + "grad_norm": 9.853370438010744, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 120946092, + "step": 1247 + }, + { + "epoch": 0.12192021900664841, + "loss": 0.14217360317707062, + "loss_ce": 0.01434309221804142, + "loss_iou": 0.275390625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 120946092, + "step": 1247 + }, + { + "epoch": 0.12201798983183418, + "grad_norm": 4.696737039606985, + "learning_rate": 5e-05, + "loss": 0.132, + "num_input_tokens_seen": 121042836, + "step": 1248 + }, + { + "epoch": 0.12201798983183418, + "loss": 0.08524046093225479, + "loss_ce": 0.004185774363577366, + "loss_iou": 0.408203125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 121042836, + "step": 1248 + }, + { + "epoch": 0.12211576065701994, + "grad_norm": 3.2921004759009307, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 121140276, + "step": 1249 + }, + { + "epoch": 0.12211576065701994, + "loss": 0.08438180387020111, + "loss_ce": 0.00987313687801361, + "loss_iou": 0.328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 121140276, + "step": 1249 + }, + { + "epoch": 0.1222135314822057, + "grad_norm": 12.646019260621838, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 121237252, + "step": 1250 + }, + { + "epoch": 0.1222135314822057, + "eval_seeclick_CIoU": 0.311751127243042, + "eval_seeclick_GIoU": 0.2982069328427315, + "eval_seeclick_IoU": 0.3899479955434799, + "eval_seeclick_MAE_all": 0.11601874232292175, + "eval_seeclick_MAE_h": 0.08094261586666107, + "eval_seeclick_MAE_w": 0.14344651252031326, + "eval_seeclick_MAE_x": 0.16322429478168488, + "eval_seeclick_MAE_y": 0.07646151259541512, + "eval_seeclick_NUM_probability": 0.9998312294483185, + "eval_seeclick_inside_bbox": 0.5539772808551788, + "eval_seeclick_loss": 0.372999370098114, + "eval_seeclick_loss_ce": 0.010338212363421917, + "eval_seeclick_loss_iou": 0.445068359375, + "eval_seeclick_loss_num": 0.06833648681640625, + "eval_seeclick_loss_xval": 0.34149169921875, + "eval_seeclick_runtime": 74.2288, + "eval_seeclick_samples_per_second": 0.579, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 121237252, + "step": 1250 + }, + { + "epoch": 0.1222135314822057, + "eval_icons_CIoU": 0.587596207857132, + "eval_icons_GIoU": 0.5935942530632019, + "eval_icons_IoU": 0.6215006411075592, + "eval_icons_MAE_all": 0.06812454015016556, + "eval_icons_MAE_h": 0.05634833872318268, + "eval_icons_MAE_w": 0.08151515945792198, + "eval_icons_MAE_x": 0.07955414615571499, + "eval_icons_MAE_y": 0.05508050695061684, + "eval_icons_NUM_probability": 0.9998791217803955, + "eval_icons_inside_bbox": 0.7795138955116272, + "eval_icons_loss": 0.2104247510433197, + "eval_icons_loss_ce": 4.786908539244905e-05, + "eval_icons_loss_iou": 0.33489990234375, + "eval_icons_loss_num": 0.04492950439453125, + "eval_icons_loss_xval": 0.22442626953125, + "eval_icons_runtime": 86.511, + "eval_icons_samples_per_second": 0.578, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 121237252, + "step": 1250 + }, + { + "epoch": 0.1222135314822057, + "eval_screenspot_CIoU": 0.2829638520876567, + "eval_screenspot_GIoU": 0.2582654158274333, + "eval_screenspot_IoU": 0.36076101660728455, + "eval_screenspot_MAE_all": 0.1641411433617274, + "eval_screenspot_MAE_h": 0.15018353362878165, + "eval_screenspot_MAE_w": 0.19124028086662292, + "eval_screenspot_MAE_x": 0.16402006645997366, + "eval_screenspot_MAE_y": 0.15112069249153137, + "eval_screenspot_NUM_probability": 0.99992569287618, + "eval_screenspot_inside_bbox": 0.5883333285649618, + "eval_screenspot_loss": 0.5645705461502075, + "eval_screenspot_loss_ce": 0.021755032241344452, + "eval_screenspot_loss_iou": 0.3218587239583333, + "eval_screenspot_loss_num": 0.10807291666666667, + "eval_screenspot_loss_xval": 0.5400797526041666, + "eval_screenspot_runtime": 145.9676, + "eval_screenspot_samples_per_second": 0.61, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 121237252, + "step": 1250 + }, + { + "epoch": 0.1222135314822057, + "eval_compot_CIoU": 0.4443059116601944, + "eval_compot_GIoU": 0.4315788298845291, + "eval_compot_IoU": 0.5162559002637863, + "eval_compot_MAE_all": 0.12249536067247391, + "eval_compot_MAE_h": 0.12343420833349228, + "eval_compot_MAE_w": 0.12685572355985641, + "eval_compot_MAE_x": 0.12162161991000175, + "eval_compot_MAE_y": 0.11806988343596458, + "eval_compot_NUM_probability": 0.9998907446861267, + "eval_compot_inside_bbox": 0.6736111044883728, + "eval_compot_loss": 0.39995911717414856, + "eval_compot_loss_ce": 0.0258528683334589, + "eval_compot_loss_iou": 0.4403076171875, + "eval_compot_loss_num": 0.07547760009765625, + "eval_compot_loss_xval": 0.3773193359375, + "eval_compot_runtime": 94.4124, + "eval_compot_samples_per_second": 0.53, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 121237252, + "step": 1250 + }, + { + "epoch": 0.1222135314822057, + "loss": 0.39777201414108276, + "loss_ce": 0.028387252241373062, + "loss_iou": 0.48046875, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 121237252, + "step": 1250 + }, + { + "epoch": 0.12231130230739147, + "grad_norm": 27.235720500642476, + "learning_rate": 5e-05, + "loss": 0.1451, + "num_input_tokens_seen": 121334240, + "step": 1251 + }, + { + "epoch": 0.12231130230739147, + "loss": 0.11875378340482712, + "loss_ce": 0.0033057795371860266, + "loss_iou": 0.46484375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 121334240, + "step": 1251 + }, + { + "epoch": 0.12240907313257723, + "grad_norm": 17.99666790843522, + "learning_rate": 5e-05, + "loss": 0.1428, + "num_input_tokens_seen": 121431336, + "step": 1252 + }, + { + "epoch": 0.12240907313257723, + "loss": 0.16452626883983612, + "loss_ce": 0.004186913836747408, + "loss_iou": 0.51171875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 121431336, + "step": 1252 + }, + { + "epoch": 0.122506843957763, + "grad_norm": 9.6588953126302, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 121527968, + "step": 1253 + }, + { + "epoch": 0.122506843957763, + "loss": 0.1085212230682373, + "loss_ce": 0.004166360944509506, + "loss_iou": 0.369140625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 121527968, + "step": 1253 + }, + { + "epoch": 0.12260461478294876, + "grad_norm": 13.54715978368195, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 121624732, + "step": 1254 + }, + { + "epoch": 0.12260461478294876, + "loss": 0.1166151762008667, + "loss_ce": 0.010719181969761848, + "loss_iou": 0.38671875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 121624732, + "step": 1254 + }, + { + "epoch": 0.12270238560813453, + "grad_norm": 7.082355532316657, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 121722272, + "step": 1255 + }, + { + "epoch": 0.12270238560813453, + "loss": 0.13402967154979706, + "loss_ce": 0.00848035141825676, + "loss_iou": 0.349609375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 121722272, + "step": 1255 + }, + { + "epoch": 0.1228001564333203, + "grad_norm": 7.233110297347825, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 121819588, + "step": 1256 + }, + { + "epoch": 0.1228001564333203, + "loss": 0.1029195711016655, + "loss_ce": 0.004286759998649359, + "loss_iou": 0.42578125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 121819588, + "step": 1256 + }, + { + "epoch": 0.12289792725850607, + "grad_norm": 12.36380035599354, + "learning_rate": 5e-05, + "loss": 0.1489, + "num_input_tokens_seen": 121916188, + "step": 1257 + }, + { + "epoch": 0.12289792725850607, + "loss": 0.1164700984954834, + "loss_ce": 0.0033719539642333984, + "loss_iou": 0.396484375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 121916188, + "step": 1257 + }, + { + "epoch": 0.12299569808369183, + "grad_norm": 12.069385603727431, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 122013924, + "step": 1258 + }, + { + "epoch": 0.12299569808369183, + "loss": 0.1427038013935089, + "loss_ce": 0.011905461549758911, + "loss_iou": 0.3515625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 122013924, + "step": 1258 + }, + { + "epoch": 0.1230934689088776, + "grad_norm": 8.627488637700568, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 122110772, + "step": 1259 + }, + { + "epoch": 0.1230934689088776, + "loss": 0.11630471795797348, + "loss_ce": 0.0023978599347174168, + "loss_iou": 0.408203125, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 122110772, + "step": 1259 + }, + { + "epoch": 0.12319123973406336, + "grad_norm": 21.09456920099122, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 122207812, + "step": 1260 + }, + { + "epoch": 0.12319123973406336, + "loss": 0.11464811116456985, + "loss_ce": 0.012566810473799706, + "loss_iou": 0.35546875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 122207812, + "step": 1260 + }, + { + "epoch": 0.12328901055924912, + "grad_norm": 26.84929395787998, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 122304128, + "step": 1261 + }, + { + "epoch": 0.12328901055924912, + "loss": 0.0997338593006134, + "loss_ce": 0.0037560774944722652, + "loss_iou": 0.36328125, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 122304128, + "step": 1261 + }, + { + "epoch": 0.12338678138443489, + "grad_norm": 7.755034369100774, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 122400212, + "step": 1262 + }, + { + "epoch": 0.12338678138443489, + "loss": 0.11967238038778305, + "loss_ce": 0.0052619860507547855, + "loss_iou": 0.353515625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 122400212, + "step": 1262 + }, + { + "epoch": 0.12348455220962065, + "grad_norm": 4.550426258576175, + "learning_rate": 5e-05, + "loss": 0.1419, + "num_input_tokens_seen": 122496984, + "step": 1263 + }, + { + "epoch": 0.12348455220962065, + "loss": 0.1050226241350174, + "loss_ce": 0.009578898549079895, + "loss_iou": 0.373046875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 122496984, + "step": 1263 + }, + { + "epoch": 0.12358232303480642, + "grad_norm": 12.422062438635779, + "learning_rate": 5e-05, + "loss": 0.1291, + "num_input_tokens_seen": 122594476, + "step": 1264 + }, + { + "epoch": 0.12358232303480642, + "loss": 0.12611499428749084, + "loss_ce": 0.006547118071466684, + "loss_iou": 0.396484375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 122594476, + "step": 1264 + }, + { + "epoch": 0.12368009385999218, + "grad_norm": 6.893599223861397, + "learning_rate": 5e-05, + "loss": 0.1365, + "num_input_tokens_seen": 122691156, + "step": 1265 + }, + { + "epoch": 0.12368009385999218, + "loss": 0.1475374847650528, + "loss_ce": 0.003402961418032646, + "loss_iou": 0.40234375, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 122691156, + "step": 1265 + }, + { + "epoch": 0.12377786468517794, + "grad_norm": 9.980914210318128, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 122787484, + "step": 1266 + }, + { + "epoch": 0.12377786468517794, + "loss": 0.07370427250862122, + "loss_ce": 0.007999931462109089, + "loss_iou": 0.32421875, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 122787484, + "step": 1266 + }, + { + "epoch": 0.12387563551036371, + "grad_norm": 13.496625186292658, + "learning_rate": 5e-05, + "loss": 0.1342, + "num_input_tokens_seen": 122884848, + "step": 1267 + }, + { + "epoch": 0.12387563551036371, + "loss": 0.13500890135765076, + "loss_ce": 0.005843266844749451, + "loss_iou": 0.26953125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 122884848, + "step": 1267 + }, + { + "epoch": 0.12397340633554947, + "grad_norm": 8.47643155385872, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 122981480, + "step": 1268 + }, + { + "epoch": 0.12397340633554947, + "loss": 0.10585089027881622, + "loss_ce": 0.004868225660175085, + "loss_iou": 0.416015625, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 122981480, + "step": 1268 + }, + { + "epoch": 0.12407117716073524, + "grad_norm": 4.461090348262064, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 123078840, + "step": 1269 + }, + { + "epoch": 0.12407117716073524, + "loss": 0.15217633545398712, + "loss_ce": 0.00917096808552742, + "loss_iou": 0.291015625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 123078840, + "step": 1269 + }, + { + "epoch": 0.124168947985921, + "grad_norm": 9.774879413721987, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 123175932, + "step": 1270 + }, + { + "epoch": 0.124168947985921, + "loss": 0.12899534404277802, + "loss_ce": 0.011594212613999844, + "loss_iou": 0.3515625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 123175932, + "step": 1270 + }, + { + "epoch": 0.12426671881110676, + "grad_norm": 10.68166901285742, + "learning_rate": 5e-05, + "loss": 0.1364, + "num_input_tokens_seen": 123272312, + "step": 1271 + }, + { + "epoch": 0.12426671881110676, + "loss": 0.07917289435863495, + "loss_ce": 0.005533972755074501, + "loss_iou": 0.453125, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 123272312, + "step": 1271 + }, + { + "epoch": 0.12436448963629253, + "grad_norm": 6.547475775061602, + "learning_rate": 5e-05, + "loss": 0.1309, + "num_input_tokens_seen": 123368020, + "step": 1272 + }, + { + "epoch": 0.12436448963629253, + "loss": 0.08581987023353577, + "loss_ce": 0.003864908590912819, + "loss_iou": 0.267578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 123368020, + "step": 1272 + }, + { + "epoch": 0.12446226046147829, + "grad_norm": 6.116506603032392, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 123465684, + "step": 1273 + }, + { + "epoch": 0.12446226046147829, + "loss": 0.12444374710321426, + "loss_ce": 0.00469276774674654, + "loss_iou": 0.4453125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 123465684, + "step": 1273 + }, + { + "epoch": 0.12456003128666406, + "grad_norm": 27.934682311736967, + "learning_rate": 5e-05, + "loss": 0.1488, + "num_input_tokens_seen": 123562744, + "step": 1274 + }, + { + "epoch": 0.12456003128666406, + "loss": 0.15873637795448303, + "loss_ce": 0.003844405058771372, + "loss_iou": 0.54296875, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 123562744, + "step": 1274 + }, + { + "epoch": 0.12465780211184982, + "grad_norm": 5.713516446224595, + "learning_rate": 5e-05, + "loss": 0.1337, + "num_input_tokens_seen": 123660236, + "step": 1275 + }, + { + "epoch": 0.12465780211184982, + "loss": 0.1503257155418396, + "loss_ce": 0.0065116286277771, + "loss_iou": 0.380859375, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 123660236, + "step": 1275 + }, + { + "epoch": 0.12475557293703558, + "grad_norm": 6.355848132778398, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 123756452, + "step": 1276 + }, + { + "epoch": 0.12475557293703558, + "loss": 0.11518871784210205, + "loss_ce": 0.005397912114858627, + "loss_iou": 0.349609375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 123756452, + "step": 1276 + }, + { + "epoch": 0.12485334376222135, + "grad_norm": 22.165208152777684, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 123854192, + "step": 1277 + }, + { + "epoch": 0.12485334376222135, + "loss": 0.11103098839521408, + "loss_ce": 0.004799303598701954, + "loss_iou": 0.41796875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 123854192, + "step": 1277 + }, + { + "epoch": 0.12495111458740711, + "grad_norm": 17.872320574884988, + "learning_rate": 5e-05, + "loss": 0.1366, + "num_input_tokens_seen": 123951380, + "step": 1278 + }, + { + "epoch": 0.12495111458740711, + "loss": 0.0955573171377182, + "loss_ce": 0.0033179407473653555, + "loss_iou": 0.421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 123951380, + "step": 1278 + }, + { + "epoch": 0.12504888541259288, + "grad_norm": 20.39204090637861, + "learning_rate": 5e-05, + "loss": 0.187, + "num_input_tokens_seen": 124049280, + "step": 1279 + }, + { + "epoch": 0.12504888541259288, + "loss": 0.25003308057785034, + "loss_ce": 0.008211774751543999, + "loss_iou": 0.4296875, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 124049280, + "step": 1279 + }, + { + "epoch": 0.12514665623777865, + "grad_norm": 19.98656524183441, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 124145100, + "step": 1280 + }, + { + "epoch": 0.12514665623777865, + "loss": 0.09886566549539566, + "loss_ce": 0.0052072168327867985, + "loss_iou": 0.26171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 124145100, + "step": 1280 + }, + { + "epoch": 0.1252444270629644, + "grad_norm": 6.3441048392791295, + "learning_rate": 5e-05, + "loss": 0.1271, + "num_input_tokens_seen": 124241332, + "step": 1281 + }, + { + "epoch": 0.1252444270629644, + "loss": 0.08222349733114243, + "loss_ce": 0.006234726868569851, + "loss_iou": 0.28515625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 124241332, + "step": 1281 + }, + { + "epoch": 0.12534219788815018, + "grad_norm": 18.475389416503322, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 124338624, + "step": 1282 + }, + { + "epoch": 0.12534219788815018, + "loss": 0.11268745362758636, + "loss_ce": 0.01106391754001379, + "loss_iou": 0.326171875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 124338624, + "step": 1282 + }, + { + "epoch": 0.12543996871333593, + "grad_norm": 39.3633166337758, + "learning_rate": 5e-05, + "loss": 0.1268, + "num_input_tokens_seen": 124436116, + "step": 1283 + }, + { + "epoch": 0.12543996871333593, + "loss": 0.1188974529504776, + "loss_ce": 0.007477769162505865, + "loss_iou": 0.38671875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 124436116, + "step": 1283 + }, + { + "epoch": 0.1255377395385217, + "grad_norm": 17.06145112603138, + "learning_rate": 5e-05, + "loss": 0.1091, + "num_input_tokens_seen": 124533552, + "step": 1284 + }, + { + "epoch": 0.1255377395385217, + "loss": 0.07152772694826126, + "loss_ce": 0.004816301167011261, + "loss_iou": 0.384765625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 124533552, + "step": 1284 + }, + { + "epoch": 0.12563551036370746, + "grad_norm": 25.247024706297733, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 124630784, + "step": 1285 + }, + { + "epoch": 0.12563551036370746, + "loss": 0.10044772177934647, + "loss_ce": 0.007140228524804115, + "loss_iou": 0.376953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 124630784, + "step": 1285 + }, + { + "epoch": 0.12573328118889324, + "grad_norm": 13.456305878821773, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 124727948, + "step": 1286 + }, + { + "epoch": 0.12573328118889324, + "loss": 0.09614834934473038, + "loss_ce": 0.007326940074563026, + "loss_iou": 0.384765625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 124727948, + "step": 1286 + }, + { + "epoch": 0.125831052014079, + "grad_norm": 7.9003536521615505, + "learning_rate": 5e-05, + "loss": 0.1353, + "num_input_tokens_seen": 124824560, + "step": 1287 + }, + { + "epoch": 0.125831052014079, + "loss": 0.1365860253572464, + "loss_ce": 0.0025718999095261097, + "loss_iou": 0.39453125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 124824560, + "step": 1287 + }, + { + "epoch": 0.12592882283926476, + "grad_norm": 5.796949381362213, + "learning_rate": 5e-05, + "loss": 0.1661, + "num_input_tokens_seen": 124922528, + "step": 1288 + }, + { + "epoch": 0.12592882283926476, + "loss": 0.11236700415611267, + "loss_ce": 0.004212707281112671, + "loss_iou": 0.400390625, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 124922528, + "step": 1288 + }, + { + "epoch": 0.12602659366445051, + "grad_norm": 4.451454852390654, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 125019568, + "step": 1289 + }, + { + "epoch": 0.12602659366445051, + "loss": 0.10340578109025955, + "loss_ce": 0.004223654977977276, + "loss_iou": 0.32421875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 125019568, + "step": 1289 + }, + { + "epoch": 0.1261243644896363, + "grad_norm": 8.02864710012319, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 125116204, + "step": 1290 + }, + { + "epoch": 0.1261243644896363, + "loss": 0.08265772461891174, + "loss_ce": 0.0053338101133704185, + "loss_iou": 0.353515625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 125116204, + "step": 1290 + }, + { + "epoch": 0.12622213531482207, + "grad_norm": 2.5984202394993647, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 125212640, + "step": 1291 + }, + { + "epoch": 0.12622213531482207, + "loss": 0.07618719339370728, + "loss_ce": 0.0047302767634391785, + "loss_iou": 0.35546875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 125212640, + "step": 1291 + }, + { + "epoch": 0.12631990614000782, + "grad_norm": 3.6357402684846027, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 125309672, + "step": 1292 + }, + { + "epoch": 0.12631990614000782, + "loss": 0.10759385675191879, + "loss_ce": 0.0059398021548986435, + "loss_iou": 0.33984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 125309672, + "step": 1292 + }, + { + "epoch": 0.1264176769651936, + "grad_norm": 13.297640177863533, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 125407828, + "step": 1293 + }, + { + "epoch": 0.1264176769651936, + "loss": 0.11138568073511124, + "loss_ce": 0.0014308432582765818, + "loss_iou": 0.32421875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 125407828, + "step": 1293 + }, + { + "epoch": 0.12651544779037935, + "grad_norm": 27.190696372918456, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 125504820, + "step": 1294 + }, + { + "epoch": 0.12651544779037935, + "loss": 0.08172129839658737, + "loss_ce": 0.002314560115337372, + "loss_iou": 0.35546875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 125504820, + "step": 1294 + }, + { + "epoch": 0.12661321861556513, + "grad_norm": 10.4802472478667, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 125601948, + "step": 1295 + }, + { + "epoch": 0.12661321861556513, + "loss": 0.10522618889808655, + "loss_ce": 0.003938345238566399, + "loss_iou": 0.3828125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 125601948, + "step": 1295 + }, + { + "epoch": 0.12671098944075088, + "grad_norm": 5.114974355495881, + "learning_rate": 5e-05, + "loss": 0.1198, + "num_input_tokens_seen": 125698528, + "step": 1296 + }, + { + "epoch": 0.12671098944075088, + "loss": 0.09013420343399048, + "loss_ce": 0.002434315625578165, + "loss_iou": 0.291015625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 125698528, + "step": 1296 + }, + { + "epoch": 0.12680876026593665, + "grad_norm": 2.139236817438612, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 125795200, + "step": 1297 + }, + { + "epoch": 0.12680876026593665, + "loss": 0.06368601322174072, + "loss_ce": 0.002391457324847579, + "loss_iou": 0.365234375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 125795200, + "step": 1297 + }, + { + "epoch": 0.1269065310911224, + "grad_norm": 3.679470716102227, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 125892028, + "step": 1298 + }, + { + "epoch": 0.1269065310911224, + "loss": 0.06322696059942245, + "loss_ce": 0.005563997197896242, + "loss_iou": 0.404296875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 125892028, + "step": 1298 + }, + { + "epoch": 0.12700430191630818, + "grad_norm": 16.00967868849005, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 125989308, + "step": 1299 + }, + { + "epoch": 0.12700430191630818, + "loss": 0.16008338332176208, + "loss_ce": 0.007068240083754063, + "loss_iou": 0.451171875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 125989308, + "step": 1299 + }, + { + "epoch": 0.12710207274149393, + "grad_norm": 18.52458441692364, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 126086340, + "step": 1300 + }, + { + "epoch": 0.12710207274149393, + "loss": 0.14280365407466888, + "loss_ce": 0.004070740193128586, + "loss_iou": 0.462890625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 126086340, + "step": 1300 + }, + { + "epoch": 0.1271998435666797, + "grad_norm": 13.182011844789578, + "learning_rate": 5e-05, + "loss": 0.1348, + "num_input_tokens_seen": 126183512, + "step": 1301 + }, + { + "epoch": 0.1271998435666797, + "loss": 0.12751725316047668, + "loss_ce": 0.0077357483096420765, + "loss_iou": 0.4921875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 126183512, + "step": 1301 + }, + { + "epoch": 0.12729761439186546, + "grad_norm": 10.019506040198765, + "learning_rate": 5e-05, + "loss": 0.1263, + "num_input_tokens_seen": 126280148, + "step": 1302 + }, + { + "epoch": 0.12729761439186546, + "loss": 0.14347995817661285, + "loss_ce": 0.008897439576685429, + "loss_iou": 0.361328125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 126280148, + "step": 1302 + }, + { + "epoch": 0.12739538521705124, + "grad_norm": 13.511492290137964, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 126377240, + "step": 1303 + }, + { + "epoch": 0.12739538521705124, + "loss": 0.0970664918422699, + "loss_ce": 0.0029502790421247482, + "loss_iou": 0.48828125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 126377240, + "step": 1303 + }, + { + "epoch": 0.127493156042237, + "grad_norm": 10.091428184170628, + "learning_rate": 5e-05, + "loss": 0.159, + "num_input_tokens_seen": 126472528, + "step": 1304 + }, + { + "epoch": 0.127493156042237, + "loss": 0.09579287469387054, + "loss_ce": 0.005918608512729406, + "loss_iou": 0.365234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 126472528, + "step": 1304 + }, + { + "epoch": 0.12759092686742277, + "grad_norm": 8.284880310722626, + "learning_rate": 5e-05, + "loss": 0.1277, + "num_input_tokens_seen": 126569656, + "step": 1305 + }, + { + "epoch": 0.12759092686742277, + "loss": 0.10187986493110657, + "loss_ce": 0.005810528993606567, + "loss_iou": 0.337890625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 126569656, + "step": 1305 + }, + { + "epoch": 0.12768869769260852, + "grad_norm": 29.904336757438404, + "learning_rate": 5e-05, + "loss": 0.1544, + "num_input_tokens_seen": 126667252, + "step": 1306 + }, + { + "epoch": 0.12768869769260852, + "loss": 0.13786743581295013, + "loss_ce": 0.006611323449760675, + "loss_iou": 0.443359375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 126667252, + "step": 1306 + }, + { + "epoch": 0.1277864685177943, + "grad_norm": 5.869693493481423, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 126764072, + "step": 1307 + }, + { + "epoch": 0.1277864685177943, + "loss": 0.1010022982954979, + "loss_ce": 0.0021253442391753197, + "loss_iou": 0.435546875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 126764072, + "step": 1307 + }, + { + "epoch": 0.12788423934298004, + "grad_norm": 3.249891431694302, + "learning_rate": 5e-05, + "loss": 0.1146, + "num_input_tokens_seen": 126861560, + "step": 1308 + }, + { + "epoch": 0.12788423934298004, + "loss": 0.09833826124668121, + "loss_ce": 0.0034972578287124634, + "loss_iou": 0.43359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 126861560, + "step": 1308 + }, + { + "epoch": 0.12798201016816582, + "grad_norm": 12.673584806368329, + "learning_rate": 5e-05, + "loss": 0.1598, + "num_input_tokens_seen": 126959316, + "step": 1309 + }, + { + "epoch": 0.12798201016816582, + "loss": 0.169511079788208, + "loss_ce": 0.0064556715078651905, + "loss_iou": 0.3671875, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 126959316, + "step": 1309 + }, + { + "epoch": 0.12807978099335157, + "grad_norm": 8.735870615080783, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 127056112, + "step": 1310 + }, + { + "epoch": 0.12807978099335157, + "loss": 0.14857268333435059, + "loss_ce": 0.005811450071632862, + "loss_iou": 0.3359375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 127056112, + "step": 1310 + }, + { + "epoch": 0.12817755181853735, + "grad_norm": 24.041140040468505, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 127153384, + "step": 1311 + }, + { + "epoch": 0.12817755181853735, + "loss": 0.12949198484420776, + "loss_ce": 0.00661296583712101, + "loss_iou": 0.396484375, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 127153384, + "step": 1311 + }, + { + "epoch": 0.1282753226437231, + "grad_norm": 8.898660633441503, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 127250432, + "step": 1312 + }, + { + "epoch": 0.1282753226437231, + "loss": 0.09839729219675064, + "loss_ce": 0.00705818273127079, + "loss_iou": 0.41796875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 127250432, + "step": 1312 + }, + { + "epoch": 0.12837309346890888, + "grad_norm": 5.562321213495246, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 127347476, + "step": 1313 + }, + { + "epoch": 0.12837309346890888, + "loss": 0.08762642741203308, + "loss_ce": 0.0026044552214443684, + "loss_iou": 0.353515625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 127347476, + "step": 1313 + }, + { + "epoch": 0.12847086429409466, + "grad_norm": 20.293841012578213, + "learning_rate": 5e-05, + "loss": 0.1507, + "num_input_tokens_seen": 127444224, + "step": 1314 + }, + { + "epoch": 0.12847086429409466, + "loss": 0.16148851811885834, + "loss_ce": 0.01100633479654789, + "loss_iou": 0.3125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 127444224, + "step": 1314 + }, + { + "epoch": 0.1285686351192804, + "grad_norm": 9.612594540654927, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 127541124, + "step": 1315 + }, + { + "epoch": 0.1285686351192804, + "loss": 0.10410565137863159, + "loss_ce": 0.002787287812680006, + "loss_iou": 0.4765625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 127541124, + "step": 1315 + }, + { + "epoch": 0.12866640594446618, + "grad_norm": 8.233653987346486, + "learning_rate": 5e-05, + "loss": 0.1223, + "num_input_tokens_seen": 127637960, + "step": 1316 + }, + { + "epoch": 0.12866640594446618, + "loss": 0.14794746041297913, + "loss_ce": 0.00659004133194685, + "loss_iou": 0.466796875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 127637960, + "step": 1316 + }, + { + "epoch": 0.12876417676965193, + "grad_norm": 8.055471118737366, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 127733664, + "step": 1317 + }, + { + "epoch": 0.12876417676965193, + "loss": 0.12465526908636093, + "loss_ce": 0.006277581676840782, + "loss_iou": 0.373046875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 127733664, + "step": 1317 + }, + { + "epoch": 0.1288619475948377, + "grad_norm": 14.115289008328942, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 127830584, + "step": 1318 + }, + { + "epoch": 0.1288619475948377, + "loss": 0.13161897659301758, + "loss_ce": 0.011990077793598175, + "loss_iou": 0.26953125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 127830584, + "step": 1318 + }, + { + "epoch": 0.12895971842002346, + "grad_norm": 4.293087228902639, + "learning_rate": 5e-05, + "loss": 0.1398, + "num_input_tokens_seen": 127927520, + "step": 1319 + }, + { + "epoch": 0.12895971842002346, + "loss": 0.15428532660007477, + "loss_ce": 0.01845158264040947, + "loss_iou": 0.328125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 127927520, + "step": 1319 + }, + { + "epoch": 0.12905748924520924, + "grad_norm": 9.428037217887084, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 128023608, + "step": 1320 + }, + { + "epoch": 0.12905748924520924, + "loss": 0.05169735103845596, + "loss_ce": 0.004212000407278538, + "loss_iou": 0.25, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 128023608, + "step": 1320 + }, + { + "epoch": 0.129155260070395, + "grad_norm": 7.145788540164979, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 128120240, + "step": 1321 + }, + { + "epoch": 0.129155260070395, + "loss": 0.08933115750551224, + "loss_ce": 0.0048585012555122375, + "loss_iou": 0.3359375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 128120240, + "step": 1321 + }, + { + "epoch": 0.12925303089558077, + "grad_norm": 6.107783983534137, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 128216512, + "step": 1322 + }, + { + "epoch": 0.12925303089558077, + "loss": 0.1294219195842743, + "loss_ce": 0.007687306497246027, + "loss_iou": 0.298828125, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 128216512, + "step": 1322 + }, + { + "epoch": 0.12935080172076652, + "grad_norm": 12.409918361799566, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 128312580, + "step": 1323 + }, + { + "epoch": 0.12935080172076652, + "loss": 0.09823016822338104, + "loss_ce": 0.004770081024616957, + "loss_iou": 0.361328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 128312580, + "step": 1323 + }, + { + "epoch": 0.1294485725459523, + "grad_norm": 31.036668949050686, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 128410548, + "step": 1324 + }, + { + "epoch": 0.1294485725459523, + "loss": 0.1272813081741333, + "loss_ce": 0.0111924409866333, + "loss_iou": 0.3515625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 128410548, + "step": 1324 + }, + { + "epoch": 0.12954634337113805, + "grad_norm": 14.886491851159482, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 128506584, + "step": 1325 + }, + { + "epoch": 0.12954634337113805, + "loss": 0.10205674171447754, + "loss_ce": 0.0029051261954009533, + "loss_iou": 0.453125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 128506584, + "step": 1325 + }, + { + "epoch": 0.12964411419632382, + "grad_norm": 3.033383497380894, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 128603092, + "step": 1326 + }, + { + "epoch": 0.12964411419632382, + "loss": 0.07119759172201157, + "loss_ce": 0.005462729837745428, + "loss_iou": 0.294921875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 128603092, + "step": 1326 + }, + { + "epoch": 0.12974188502150957, + "grad_norm": 21.876138508191676, + "learning_rate": 5e-05, + "loss": 0.1355, + "num_input_tokens_seen": 128699552, + "step": 1327 + }, + { + "epoch": 0.12974188502150957, + "loss": 0.14157167077064514, + "loss_ce": 0.009659434668719769, + "loss_iou": 0.291015625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 128699552, + "step": 1327 + }, + { + "epoch": 0.12983965584669535, + "grad_norm": 40.53254401755327, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 128797076, + "step": 1328 + }, + { + "epoch": 0.12983965584669535, + "loss": 0.1433088779449463, + "loss_ce": 0.005430473480373621, + "loss_iou": 0.37890625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 128797076, + "step": 1328 + }, + { + "epoch": 0.1299374266718811, + "grad_norm": 5.148915244044327, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 128893552, + "step": 1329 + }, + { + "epoch": 0.1299374266718811, + "loss": 0.1170312762260437, + "loss_ce": 0.008880793116986752, + "loss_iou": 0.400390625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 128893552, + "step": 1329 + }, + { + "epoch": 0.13003519749706688, + "grad_norm": 8.64087579644226, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 128990996, + "step": 1330 + }, + { + "epoch": 0.13003519749706688, + "loss": 0.1012360006570816, + "loss_ce": 0.004190102219581604, + "loss_iou": 0.46875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 128990996, + "step": 1330 + }, + { + "epoch": 0.13013296832225263, + "grad_norm": 17.396255790233177, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 129087328, + "step": 1331 + }, + { + "epoch": 0.13013296832225263, + "loss": 0.07761421799659729, + "loss_ce": 0.003395464736968279, + "loss_iou": 0.22265625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 129087328, + "step": 1331 + }, + { + "epoch": 0.1302307391474384, + "grad_norm": 18.401303223184424, + "learning_rate": 5e-05, + "loss": 0.1129, + "num_input_tokens_seen": 129184404, + "step": 1332 + }, + { + "epoch": 0.1302307391474384, + "loss": 0.08831409364938736, + "loss_ce": 0.0019188264850527048, + "loss_iou": 0.38671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 129184404, + "step": 1332 + }, + { + "epoch": 0.13032850997262416, + "grad_norm": 8.679716225653456, + "learning_rate": 5e-05, + "loss": 0.1215, + "num_input_tokens_seen": 129281504, + "step": 1333 + }, + { + "epoch": 0.13032850997262416, + "loss": 0.10430026799440384, + "loss_ce": 0.0032870867289602757, + "loss_iou": 0.4296875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 129281504, + "step": 1333 + }, + { + "epoch": 0.13042628079780993, + "grad_norm": 7.400024037302964, + "learning_rate": 5e-05, + "loss": 0.1207, + "num_input_tokens_seen": 129377268, + "step": 1334 + }, + { + "epoch": 0.13042628079780993, + "loss": 0.12361205369234085, + "loss_ce": 0.010620727203786373, + "loss_iou": 0.1787109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 129377268, + "step": 1334 + }, + { + "epoch": 0.13052405162299568, + "grad_norm": 8.358246232237807, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 129475064, + "step": 1335 + }, + { + "epoch": 0.13052405162299568, + "loss": 0.09097345918416977, + "loss_ce": 0.007523140870034695, + "loss_iou": 0.326171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 129475064, + "step": 1335 + }, + { + "epoch": 0.13062182244818146, + "grad_norm": 2.7704397196613346, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 129572532, + "step": 1336 + }, + { + "epoch": 0.13062182244818146, + "loss": 0.12595218420028687, + "loss_ce": 0.004675324074923992, + "loss_iou": 0.3046875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 129572532, + "step": 1336 + }, + { + "epoch": 0.13071959327336724, + "grad_norm": 3.963130614564765, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 129670204, + "step": 1337 + }, + { + "epoch": 0.13071959327336724, + "loss": 0.14567166566848755, + "loss_ce": 0.0026052568573504686, + "loss_iou": 0.388671875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 129670204, + "step": 1337 + }, + { + "epoch": 0.130817364098553, + "grad_norm": 5.214943135352395, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 129767104, + "step": 1338 + }, + { + "epoch": 0.130817364098553, + "loss": 0.08722630888223648, + "loss_ce": 0.0036996949929744005, + "loss_iou": 0.28125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 129767104, + "step": 1338 + }, + { + "epoch": 0.13091513492373877, + "grad_norm": 8.97392920874501, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 129864024, + "step": 1339 + }, + { + "epoch": 0.13091513492373877, + "loss": 0.10418147593736649, + "loss_ce": 0.0022680240217596292, + "loss_iou": 0.427734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 129864024, + "step": 1339 + }, + { + "epoch": 0.13101290574892452, + "grad_norm": 22.170778492837115, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 129960560, + "step": 1340 + }, + { + "epoch": 0.13101290574892452, + "loss": 0.10889993607997894, + "loss_ce": 0.004560342989861965, + "loss_iou": 0.36328125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 129960560, + "step": 1340 + }, + { + "epoch": 0.1311106765741103, + "grad_norm": 8.466527141376872, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 130056748, + "step": 1341 + }, + { + "epoch": 0.1311106765741103, + "loss": 0.1384047567844391, + "loss_ce": 0.005470184609293938, + "loss_iou": 0.35546875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 130056748, + "step": 1341 + }, + { + "epoch": 0.13120844739929605, + "grad_norm": 4.876110605656064, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 130153588, + "step": 1342 + }, + { + "epoch": 0.13120844739929605, + "loss": 0.09280601143836975, + "loss_ce": 0.005781317129731178, + "loss_iou": 0.30859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 130153588, + "step": 1342 + }, + { + "epoch": 0.13130621822448182, + "grad_norm": 25.727522210086534, + "learning_rate": 5e-05, + "loss": 0.1445, + "num_input_tokens_seen": 130250600, + "step": 1343 + }, + { + "epoch": 0.13130621822448182, + "loss": 0.12601588666439056, + "loss_ce": 0.001618600683286786, + "loss_iou": 0.455078125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 130250600, + "step": 1343 + }, + { + "epoch": 0.13140398904966757, + "grad_norm": 42.565769254590236, + "learning_rate": 5e-05, + "loss": 0.152, + "num_input_tokens_seen": 130347300, + "step": 1344 + }, + { + "epoch": 0.13140398904966757, + "loss": 0.16425073146820068, + "loss_ce": 0.005040537100285292, + "loss_iou": 0.373046875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 130347300, + "step": 1344 + }, + { + "epoch": 0.13150175987485335, + "grad_norm": 23.33143470303323, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 130444820, + "step": 1345 + }, + { + "epoch": 0.13150175987485335, + "loss": 0.1241191178560257, + "loss_ce": 0.005924544762820005, + "loss_iou": 0.330078125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 130444820, + "step": 1345 + }, + { + "epoch": 0.1315995307000391, + "grad_norm": 9.694474661617557, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 130542204, + "step": 1346 + }, + { + "epoch": 0.1315995307000391, + "loss": 0.06899356842041016, + "loss_ce": 0.003716470208019018, + "loss_iou": 0.38671875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 130542204, + "step": 1346 + }, + { + "epoch": 0.13169730152522488, + "grad_norm": 14.6602636404628, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 130638924, + "step": 1347 + }, + { + "epoch": 0.13169730152522488, + "loss": 0.08449061214923859, + "loss_ce": 0.009684395045042038, + "loss_iou": 0.30859375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 130638924, + "step": 1347 + }, + { + "epoch": 0.13179507235041063, + "grad_norm": 3.1083736570285705, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 130736308, + "step": 1348 + }, + { + "epoch": 0.13179507235041063, + "loss": 0.06484455615282059, + "loss_ce": 0.011385385878384113, + "loss_iou": 0.341796875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 130736308, + "step": 1348 + }, + { + "epoch": 0.1318928431755964, + "grad_norm": 13.580730387441985, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 130832620, + "step": 1349 + }, + { + "epoch": 0.1318928431755964, + "loss": 0.0667007565498352, + "loss_ce": 0.0014236627612262964, + "loss_iou": 0.3046875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 130832620, + "step": 1349 + }, + { + "epoch": 0.13199061400078216, + "grad_norm": 13.16962559692755, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 130929316, + "step": 1350 + }, + { + "epoch": 0.13199061400078216, + "loss": 0.058228496462106705, + "loss_ce": 0.003922466188669205, + "loss_iou": 0.2734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 130929316, + "step": 1350 + }, + { + "epoch": 0.13208838482596794, + "grad_norm": 3.5906283957557084, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 131026340, + "step": 1351 + }, + { + "epoch": 0.13208838482596794, + "loss": 0.07335493713617325, + "loss_ce": 0.003790117334574461, + "loss_iou": 0.380859375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 131026340, + "step": 1351 + }, + { + "epoch": 0.1321861556511537, + "grad_norm": 5.621719672781856, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 131123628, + "step": 1352 + }, + { + "epoch": 0.1321861556511537, + "loss": 0.10961795598268509, + "loss_ce": 0.006743197795003653, + "loss_iou": 0.478515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 131123628, + "step": 1352 + }, + { + "epoch": 0.13228392647633946, + "grad_norm": 10.899216100820674, + "learning_rate": 5e-05, + "loss": 0.1359, + "num_input_tokens_seen": 131220452, + "step": 1353 + }, + { + "epoch": 0.13228392647633946, + "loss": 0.12025612592697144, + "loss_ce": 0.002946561900898814, + "loss_iou": 0.373046875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 131220452, + "step": 1353 + }, + { + "epoch": 0.13238169730152521, + "grad_norm": 17.461699504053602, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 131317988, + "step": 1354 + }, + { + "epoch": 0.13238169730152521, + "loss": 0.11568476259708405, + "loss_ce": 0.003860731841996312, + "loss_iou": 0.291015625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 131317988, + "step": 1354 + }, + { + "epoch": 0.132479468126711, + "grad_norm": 6.78343608412512, + "learning_rate": 5e-05, + "loss": 0.1161, + "num_input_tokens_seen": 131416088, + "step": 1355 + }, + { + "epoch": 0.132479468126711, + "loss": 0.0731949731707573, + "loss_ce": 0.005568020511418581, + "loss_iou": 0.390625, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 131416088, + "step": 1355 + }, + { + "epoch": 0.13257723895189674, + "grad_norm": 8.645896170920006, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 131513660, + "step": 1356 + }, + { + "epoch": 0.13257723895189674, + "loss": 0.12784510850906372, + "loss_ce": 0.0063241166062653065, + "loss_iou": 0.421875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 131513660, + "step": 1356 + }, + { + "epoch": 0.13267500977708252, + "grad_norm": 12.897071150171378, + "learning_rate": 5e-05, + "loss": 0.1314, + "num_input_tokens_seen": 131610936, + "step": 1357 + }, + { + "epoch": 0.13267500977708252, + "loss": 0.10658811032772064, + "loss_ce": 0.007283909246325493, + "loss_iou": 0.41796875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 131610936, + "step": 1357 + }, + { + "epoch": 0.13277278060226827, + "grad_norm": 26.475170895882187, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 131708280, + "step": 1358 + }, + { + "epoch": 0.13277278060226827, + "loss": 0.0808618813753128, + "loss_ce": 0.0023401607759296894, + "loss_iou": 0.5078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 131708280, + "step": 1358 + }, + { + "epoch": 0.13287055142745405, + "grad_norm": 19.228333046647837, + "learning_rate": 5e-05, + "loss": 0.1338, + "num_input_tokens_seen": 131805044, + "step": 1359 + }, + { + "epoch": 0.13287055142745405, + "loss": 0.1221783235669136, + "loss_ce": 0.0035564906429499388, + "loss_iou": 0.412109375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 131805044, + "step": 1359 + }, + { + "epoch": 0.13296832225263983, + "grad_norm": 32.334487056192614, + "learning_rate": 5e-05, + "loss": 0.1262, + "num_input_tokens_seen": 131901876, + "step": 1360 + }, + { + "epoch": 0.13296832225263983, + "loss": 0.1384814977645874, + "loss_ce": 0.006279350258409977, + "loss_iou": 0.46875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 131901876, + "step": 1360 + }, + { + "epoch": 0.13306609307782558, + "grad_norm": 16.307736064024915, + "learning_rate": 5e-05, + "loss": 0.1334, + "num_input_tokens_seen": 131999432, + "step": 1361 + }, + { + "epoch": 0.13306609307782558, + "loss": 0.12410581856966019, + "loss_ce": 0.005056748166680336, + "loss_iou": 0.4296875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 131999432, + "step": 1361 + }, + { + "epoch": 0.13316386390301135, + "grad_norm": 10.108767465543743, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 132096196, + "step": 1362 + }, + { + "epoch": 0.13316386390301135, + "loss": 0.12090107053518295, + "loss_ce": 0.0038127489387989044, + "loss_iou": 0.33203125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 132096196, + "step": 1362 + }, + { + "epoch": 0.1332616347281971, + "grad_norm": 12.254093728850505, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 132193860, + "step": 1363 + }, + { + "epoch": 0.1332616347281971, + "loss": 0.07408260554075241, + "loss_ce": 0.006501425988972187, + "loss_iou": 0.361328125, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 132193860, + "step": 1363 + }, + { + "epoch": 0.13335940555338288, + "grad_norm": 7.75770733663028, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 132290868, + "step": 1364 + }, + { + "epoch": 0.13335940555338288, + "loss": 0.09829766303300858, + "loss_ce": 0.0063482001423835754, + "loss_iou": 0.494140625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 132290868, + "step": 1364 + }, + { + "epoch": 0.13345717637856863, + "grad_norm": 16.736678555416486, + "learning_rate": 5e-05, + "loss": 0.1376, + "num_input_tokens_seen": 132388016, + "step": 1365 + }, + { + "epoch": 0.13345717637856863, + "loss": 0.11786645650863647, + "loss_ce": 0.0036391685716807842, + "loss_iou": 0.515625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 132388016, + "step": 1365 + }, + { + "epoch": 0.1335549472037544, + "grad_norm": 8.35747838475436, + "learning_rate": 5e-05, + "loss": 0.1591, + "num_input_tokens_seen": 132486004, + "step": 1366 + }, + { + "epoch": 0.1335549472037544, + "loss": 0.14149336516857147, + "loss_ce": 0.004026935435831547, + "loss_iou": 0.43359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 132486004, + "step": 1366 + }, + { + "epoch": 0.13365271802894016, + "grad_norm": 3.2640256518270743, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 132582216, + "step": 1367 + }, + { + "epoch": 0.13365271802894016, + "loss": 0.12167826294898987, + "loss_ce": 0.006260784342885017, + "loss_iou": 0.36328125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 132582216, + "step": 1367 + }, + { + "epoch": 0.13375048885412594, + "grad_norm": 21.30396884742411, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 132678888, + "step": 1368 + }, + { + "epoch": 0.13375048885412594, + "loss": 0.10819979012012482, + "loss_ce": 0.0038144183345139027, + "loss_iou": 0.27734375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 132678888, + "step": 1368 + }, + { + "epoch": 0.1338482596793117, + "grad_norm": 9.62338359811094, + "learning_rate": 5e-05, + "loss": 0.1225, + "num_input_tokens_seen": 132777080, + "step": 1369 + }, + { + "epoch": 0.1338482596793117, + "loss": 0.12400762736797333, + "loss_ce": 0.008178160525858402, + "loss_iou": 0.337890625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 132777080, + "step": 1369 + }, + { + "epoch": 0.13394603050449747, + "grad_norm": 13.410576236614222, + "learning_rate": 5e-05, + "loss": 0.1249, + "num_input_tokens_seen": 132874068, + "step": 1370 + }, + { + "epoch": 0.13394603050449747, + "loss": 0.11253650486469269, + "loss_ce": 0.007342418190091848, + "loss_iou": 0.41015625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 132874068, + "step": 1370 + }, + { + "epoch": 0.13404380132968322, + "grad_norm": 2.849940891708424, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 132971212, + "step": 1371 + }, + { + "epoch": 0.13404380132968322, + "loss": 0.09991715848445892, + "loss_ce": 0.0050418199971318245, + "loss_iou": 0.451171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 132971212, + "step": 1371 + }, + { + "epoch": 0.134141572154869, + "grad_norm": 10.482381299784489, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 133068356, + "step": 1372 + }, + { + "epoch": 0.134141572154869, + "loss": 0.10729990154504776, + "loss_ce": 0.003021334297955036, + "loss_iou": 0.40234375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 133068356, + "step": 1372 + }, + { + "epoch": 0.13423934298005474, + "grad_norm": 20.16359617855261, + "learning_rate": 5e-05, + "loss": 0.1332, + "num_input_tokens_seen": 133165664, + "step": 1373 + }, + { + "epoch": 0.13423934298005474, + "loss": 0.12906017899513245, + "loss_ce": 0.00677624111995101, + "loss_iou": 0.40625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 133165664, + "step": 1373 + }, + { + "epoch": 0.13433711380524052, + "grad_norm": 15.973573119625453, + "learning_rate": 5e-05, + "loss": 0.1494, + "num_input_tokens_seen": 133263068, + "step": 1374 + }, + { + "epoch": 0.13433711380524052, + "loss": 0.16616925597190857, + "loss_ce": 0.005692576989531517, + "loss_iou": 0.353515625, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 133263068, + "step": 1374 + }, + { + "epoch": 0.13443488463042627, + "grad_norm": 15.00904972788491, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 133359380, + "step": 1375 + }, + { + "epoch": 0.13443488463042627, + "loss": 0.07857222855091095, + "loss_ce": 0.00742049515247345, + "loss_iou": 0.30078125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 133359380, + "step": 1375 + }, + { + "epoch": 0.13453265545561205, + "grad_norm": 150.29510772471883, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 133455268, + "step": 1376 + }, + { + "epoch": 0.13453265545561205, + "loss": 0.06062003970146179, + "loss_ce": 0.003033366985619068, + "loss_iou": 0.25, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 133455268, + "step": 1376 + }, + { + "epoch": 0.1346304262807978, + "grad_norm": 6.180281358723589, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 133552372, + "step": 1377 + }, + { + "epoch": 0.1346304262807978, + "loss": 0.11482622474431992, + "loss_ce": 0.005748765543103218, + "loss_iou": 0.2890625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 133552372, + "step": 1377 + }, + { + "epoch": 0.13472819710598358, + "grad_norm": 1.5467266128492498, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 133649476, + "step": 1378 + }, + { + "epoch": 0.13472819710598358, + "loss": 0.07192214578390121, + "loss_ce": 0.005515896715223789, + "loss_iou": 0.291015625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 133649476, + "step": 1378 + }, + { + "epoch": 0.13482596793116933, + "grad_norm": 3.08641196583383, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 133747524, + "step": 1379 + }, + { + "epoch": 0.13482596793116933, + "loss": 0.08558705449104309, + "loss_ce": 0.004318745341151953, + "loss_iou": 0.39453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 133747524, + "step": 1379 + }, + { + "epoch": 0.1349237387563551, + "grad_norm": 28.619707433670193, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 133844796, + "step": 1380 + }, + { + "epoch": 0.1349237387563551, + "loss": 0.14090919494628906, + "loss_ce": 0.00632668100297451, + "loss_iou": 0.419921875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 133844796, + "step": 1380 + }, + { + "epoch": 0.13502150958154086, + "grad_norm": 11.077667568352638, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 133941876, + "step": 1381 + }, + { + "epoch": 0.13502150958154086, + "loss": 0.14575178921222687, + "loss_ce": 0.005981265567243099, + "loss_iou": 0.310546875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 133941876, + "step": 1381 + }, + { + "epoch": 0.13511928040672663, + "grad_norm": 7.62396597451706, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 134038956, + "step": 1382 + }, + { + "epoch": 0.13511928040672663, + "loss": 0.13912279903888702, + "loss_ce": 0.0059593431651592255, + "loss_iou": 0.41796875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 134038956, + "step": 1382 + }, + { + "epoch": 0.1352170512319124, + "grad_norm": 34.62382677759937, + "learning_rate": 5e-05, + "loss": 0.1664, + "num_input_tokens_seen": 134135592, + "step": 1383 + }, + { + "epoch": 0.1352170512319124, + "loss": 0.16016963124275208, + "loss_ce": 0.00996212288737297, + "loss_iou": 0.58203125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 134135592, + "step": 1383 + }, + { + "epoch": 0.13531482205709816, + "grad_norm": 13.297309105179835, + "learning_rate": 5e-05, + "loss": 0.1528, + "num_input_tokens_seen": 134232444, + "step": 1384 + }, + { + "epoch": 0.13531482205709816, + "loss": 0.14078831672668457, + "loss_ce": 0.006083735264837742, + "loss_iou": 0.36328125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 134232444, + "step": 1384 + }, + { + "epoch": 0.13541259288228394, + "grad_norm": 9.338096070590788, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 134330256, + "step": 1385 + }, + { + "epoch": 0.13541259288228394, + "loss": 0.07735669612884521, + "loss_ce": 0.004831670317798853, + "loss_iou": 0.412109375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 134330256, + "step": 1385 + }, + { + "epoch": 0.1355103637074697, + "grad_norm": 13.231698966111166, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 134426852, + "step": 1386 + }, + { + "epoch": 0.1355103637074697, + "loss": 0.10793363302946091, + "loss_ce": 0.005455606617033482, + "loss_iou": 0.34375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 134426852, + "step": 1386 + }, + { + "epoch": 0.13560813453265547, + "grad_norm": 5.583372375332441, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 134522756, + "step": 1387 + }, + { + "epoch": 0.13560813453265547, + "loss": 0.10063745081424713, + "loss_ce": 0.00304222758859396, + "loss_iou": 0.2294921875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 134522756, + "step": 1387 + }, + { + "epoch": 0.13570590535784122, + "grad_norm": 18.400861161643036, + "learning_rate": 5e-05, + "loss": 0.1608, + "num_input_tokens_seen": 134618872, + "step": 1388 + }, + { + "epoch": 0.13570590535784122, + "loss": 0.16063088178634644, + "loss_ce": 0.008287133648991585, + "loss_iou": 0.41015625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 134618872, + "step": 1388 + }, + { + "epoch": 0.135803676183027, + "grad_norm": 11.290488118143156, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 134715612, + "step": 1389 + }, + { + "epoch": 0.135803676183027, + "loss": 0.1311478316783905, + "loss_ce": 0.0033859952818602324, + "loss_iou": 0.34375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 134715612, + "step": 1389 + }, + { + "epoch": 0.13590144700821274, + "grad_norm": 21.70724100474708, + "learning_rate": 5e-05, + "loss": 0.1408, + "num_input_tokens_seen": 134812440, + "step": 1390 + }, + { + "epoch": 0.13590144700821274, + "loss": 0.14921285212039948, + "loss_ce": 0.009655958041548729, + "loss_iou": 0.421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 134812440, + "step": 1390 + }, + { + "epoch": 0.13599921783339852, + "grad_norm": 12.534453995423087, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 134908968, + "step": 1391 + }, + { + "epoch": 0.13599921783339852, + "loss": 0.11755844205617905, + "loss_ce": 0.008397068828344345, + "loss_iou": 0.36328125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 134908968, + "step": 1391 + }, + { + "epoch": 0.13609698865858427, + "grad_norm": 3.6188748050673265, + "learning_rate": 5e-05, + "loss": 0.1297, + "num_input_tokens_seen": 135006020, + "step": 1392 + }, + { + "epoch": 0.13609698865858427, + "loss": 0.132242351770401, + "loss_ce": 0.008096844889223576, + "loss_iou": 0.318359375, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 135006020, + "step": 1392 + }, + { + "epoch": 0.13619475948377005, + "grad_norm": 16.890533730193077, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 135102772, + "step": 1393 + }, + { + "epoch": 0.13619475948377005, + "loss": 0.08286978304386139, + "loss_ce": 0.004958411678671837, + "loss_iou": 0.408203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 135102772, + "step": 1393 + }, + { + "epoch": 0.1362925303089558, + "grad_norm": 19.637740518143023, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 135200468, + "step": 1394 + }, + { + "epoch": 0.1362925303089558, + "loss": 0.10616365820169449, + "loss_ce": 0.004341752268373966, + "loss_iou": 0.373046875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 135200468, + "step": 1394 + }, + { + "epoch": 0.13639030113414158, + "grad_norm": 5.185558232948904, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 135297204, + "step": 1395 + }, + { + "epoch": 0.13639030113414158, + "loss": 0.08435289561748505, + "loss_ce": 0.01120225340127945, + "loss_iou": 0.298828125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 135297204, + "step": 1395 + }, + { + "epoch": 0.13648807195932733, + "grad_norm": 6.236536393694745, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 135394788, + "step": 1396 + }, + { + "epoch": 0.13648807195932733, + "loss": 0.09853868186473846, + "loss_ce": 0.005826280452311039, + "loss_iou": 0.423828125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 135394788, + "step": 1396 + }, + { + "epoch": 0.1365858427845131, + "grad_norm": 17.51407857266078, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 135492264, + "step": 1397 + }, + { + "epoch": 0.1365858427845131, + "loss": 0.12766145169734955, + "loss_ce": 0.0050418199971318245, + "loss_iou": 0.38671875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 135492264, + "step": 1397 + }, + { + "epoch": 0.13668361360969886, + "grad_norm": 37.411995886445695, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 135588460, + "step": 1398 + }, + { + "epoch": 0.13668361360969886, + "loss": 0.10545753687620163, + "loss_ce": 0.010269397869706154, + "loss_iou": 0.357421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 135588460, + "step": 1398 + }, + { + "epoch": 0.13678138443488463, + "grad_norm": 22.246284217589583, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 135685812, + "step": 1399 + }, + { + "epoch": 0.13678138443488463, + "loss": 0.08296480774879456, + "loss_ce": 0.006869226694107056, + "loss_iou": 0.30078125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 135685812, + "step": 1399 + }, + { + "epoch": 0.13687915526007038, + "grad_norm": 6.545280253298447, + "learning_rate": 5e-05, + "loss": 0.1297, + "num_input_tokens_seen": 135782576, + "step": 1400 + }, + { + "epoch": 0.13687915526007038, + "loss": 0.11090975999832153, + "loss_ce": 0.0028317582327872515, + "loss_iou": 0.3671875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 135782576, + "step": 1400 + }, + { + "epoch": 0.13697692608525616, + "grad_norm": 19.35540782171642, + "learning_rate": 5e-05, + "loss": 0.1504, + "num_input_tokens_seen": 135880744, + "step": 1401 + }, + { + "epoch": 0.13697692608525616, + "loss": 0.19762033224105835, + "loss_ce": 0.007129611447453499, + "loss_iou": 0.384765625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 135880744, + "step": 1401 + }, + { + "epoch": 0.1370746969104419, + "grad_norm": 16.49550313013799, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 135977752, + "step": 1402 + }, + { + "epoch": 0.1370746969104419, + "loss": 0.1863432675600052, + "loss_ce": 0.004458509851247072, + "loss_iou": 0.484375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 135977752, + "step": 1402 + }, + { + "epoch": 0.1371724677356277, + "grad_norm": 7.941236185278319, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 136074084, + "step": 1403 + }, + { + "epoch": 0.1371724677356277, + "loss": 0.11231530457735062, + "loss_ce": 0.0064803436398506165, + "loss_iou": 0.462890625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 136074084, + "step": 1403 + }, + { + "epoch": 0.13727023856081344, + "grad_norm": 8.324646781556838, + "learning_rate": 5e-05, + "loss": 0.1141, + "num_input_tokens_seen": 136171320, + "step": 1404 + }, + { + "epoch": 0.13727023856081344, + "loss": 0.1311764121055603, + "loss_ce": 0.0037960405461490154, + "loss_iou": 0.37890625, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 136171320, + "step": 1404 + }, + { + "epoch": 0.13736800938599922, + "grad_norm": 6.724790668093414, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 136268184, + "step": 1405 + }, + { + "epoch": 0.13736800938599922, + "loss": 0.06904043257236481, + "loss_ce": 0.005548612214624882, + "loss_iou": 0.23046875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 136268184, + "step": 1405 + }, + { + "epoch": 0.137465780211185, + "grad_norm": 8.197943452229977, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 136366040, + "step": 1406 + }, + { + "epoch": 0.137465780211185, + "loss": 0.06357693672180176, + "loss_ce": 0.0031826524063944817, + "loss_iou": 0.373046875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 136366040, + "step": 1406 + }, + { + "epoch": 0.13756355103637075, + "grad_norm": 29.860343061500142, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 136463932, + "step": 1407 + }, + { + "epoch": 0.13756355103637075, + "loss": 0.09586332738399506, + "loss_ce": 0.007377609610557556, + "loss_iou": 0.4140625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 136463932, + "step": 1407 + }, + { + "epoch": 0.13766132186155652, + "grad_norm": 32.0740243307553, + "learning_rate": 5e-05, + "loss": 0.1125, + "num_input_tokens_seen": 136560760, + "step": 1408 + }, + { + "epoch": 0.13766132186155652, + "loss": 0.11300699412822723, + "loss_ce": 0.004913734272122383, + "loss_iou": 0.408203125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 136560760, + "step": 1408 + }, + { + "epoch": 0.13775909268674227, + "grad_norm": 4.722355163735575, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 136656468, + "step": 1409 + }, + { + "epoch": 0.13775909268674227, + "loss": 0.08447502553462982, + "loss_ce": 0.0075554680079221725, + "loss_iou": 0.2890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 136656468, + "step": 1409 + }, + { + "epoch": 0.13785686351192805, + "grad_norm": 6.119257950006785, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 136753312, + "step": 1410 + }, + { + "epoch": 0.13785686351192805, + "loss": 0.10524095594882965, + "loss_ce": 0.005250115878880024, + "loss_iou": 0.326171875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 136753312, + "step": 1410 + }, + { + "epoch": 0.1379546343371138, + "grad_norm": 37.908903595217204, + "learning_rate": 5e-05, + "loss": 0.1082, + "num_input_tokens_seen": 136848980, + "step": 1411 + }, + { + "epoch": 0.1379546343371138, + "loss": 0.10193216800689697, + "loss_ce": 0.005420329049229622, + "loss_iou": 0.232421875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 136848980, + "step": 1411 + }, + { + "epoch": 0.13805240516229958, + "grad_norm": 26.184445918467272, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 136946000, + "step": 1412 + }, + { + "epoch": 0.13805240516229958, + "loss": 0.0987718477845192, + "loss_ce": 0.004731928929686546, + "loss_iou": 0.330078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 136946000, + "step": 1412 + }, + { + "epoch": 0.13815017598748533, + "grad_norm": 4.128937643416616, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 137042588, + "step": 1413 + }, + { + "epoch": 0.13815017598748533, + "loss": 0.07434109598398209, + "loss_ce": 0.0014880052767693996, + "loss_iou": 0.34765625, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 137042588, + "step": 1413 + }, + { + "epoch": 0.1382479468126711, + "grad_norm": 25.453545099146908, + "learning_rate": 5e-05, + "loss": 0.216, + "num_input_tokens_seen": 137138860, + "step": 1414 + }, + { + "epoch": 0.1382479468126711, + "loss": 0.2050532102584839, + "loss_ce": 0.005376705899834633, + "loss_iou": 0.3515625, + "loss_num": 0.0400390625, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 137138860, + "step": 1414 + }, + { + "epoch": 0.13834571763785686, + "grad_norm": 18.042251006416215, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 137236200, + "step": 1415 + }, + { + "epoch": 0.13834571763785686, + "loss": 0.10873475670814514, + "loss_ce": 0.009033835493028164, + "loss_iou": 0.287109375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 137236200, + "step": 1415 + }, + { + "epoch": 0.13844348846304264, + "grad_norm": 15.259449980554402, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 137332872, + "step": 1416 + }, + { + "epoch": 0.13844348846304264, + "loss": 0.12926962971687317, + "loss_ce": 0.010373142547905445, + "loss_iou": 0.400390625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 137332872, + "step": 1416 + }, + { + "epoch": 0.13854125928822839, + "grad_norm": 26.028427445293907, + "learning_rate": 5e-05, + "loss": 0.1496, + "num_input_tokens_seen": 137429740, + "step": 1417 + }, + { + "epoch": 0.13854125928822839, + "loss": 0.15217895805835724, + "loss_ce": 0.0064880456775426865, + "loss_iou": 0.423828125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 137429740, + "step": 1417 + }, + { + "epoch": 0.13863903011341416, + "grad_norm": 21.80118443018584, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 137526292, + "step": 1418 + }, + { + "epoch": 0.13863903011341416, + "loss": 0.13172157108783722, + "loss_ce": 0.0033341203816235065, + "loss_iou": 0.412109375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 137526292, + "step": 1418 + }, + { + "epoch": 0.1387368009385999, + "grad_norm": 6.178025825934386, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 137622112, + "step": 1419 + }, + { + "epoch": 0.1387368009385999, + "loss": 0.06775184720754623, + "loss_ce": 0.006472554989159107, + "loss_iou": 0.32421875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 137622112, + "step": 1419 + }, + { + "epoch": 0.1388345717637857, + "grad_norm": 23.906297570090324, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 137720012, + "step": 1420 + }, + { + "epoch": 0.1388345717637857, + "loss": 0.11240413039922714, + "loss_ce": 0.006630204617977142, + "loss_iou": 0.29296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 137720012, + "step": 1420 + }, + { + "epoch": 0.13893234258897144, + "grad_norm": 11.439825318394721, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 137817080, + "step": 1421 + }, + { + "epoch": 0.13893234258897144, + "loss": 0.10866761207580566, + "loss_ce": 0.005945439450442791, + "loss_iou": 0.357421875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 137817080, + "step": 1421 + }, + { + "epoch": 0.13903011341415722, + "grad_norm": 30.999347661943084, + "learning_rate": 5e-05, + "loss": 0.1474, + "num_input_tokens_seen": 137913868, + "step": 1422 + }, + { + "epoch": 0.13903011341415722, + "loss": 0.18237780034542084, + "loss_ce": 0.013737669214606285, + "loss_iou": 0.33984375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 137913868, + "step": 1422 + }, + { + "epoch": 0.13912788423934297, + "grad_norm": 14.545843930231179, + "learning_rate": 5e-05, + "loss": 0.1261, + "num_input_tokens_seen": 138011180, + "step": 1423 + }, + { + "epoch": 0.13912788423934297, + "loss": 0.13886964321136475, + "loss_ce": 0.004104019142687321, + "loss_iou": 0.3515625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 138011180, + "step": 1423 + }, + { + "epoch": 0.13922565506452875, + "grad_norm": 12.309597565401189, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 138107740, + "step": 1424 + }, + { + "epoch": 0.13922565506452875, + "loss": 0.11815787106752396, + "loss_ce": 0.005944736301898956, + "loss_iou": 0.369140625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 138107740, + "step": 1424 + }, + { + "epoch": 0.1393234258897145, + "grad_norm": 14.373182505238054, + "learning_rate": 5e-05, + "loss": 0.1381, + "num_input_tokens_seen": 138205360, + "step": 1425 + }, + { + "epoch": 0.1393234258897145, + "loss": 0.07588405907154083, + "loss_ce": 0.0024129925295710564, + "loss_iou": 0.408203125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 138205360, + "step": 1425 + }, + { + "epoch": 0.13942119671490028, + "grad_norm": 16.377314270797555, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 138302320, + "step": 1426 + }, + { + "epoch": 0.13942119671490028, + "loss": 0.07347767800092697, + "loss_ce": 0.004836012609302998, + "loss_iou": 0.287109375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 138302320, + "step": 1426 + }, + { + "epoch": 0.13951896754008603, + "grad_norm": 9.171366275473556, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 138399052, + "step": 1427 + }, + { + "epoch": 0.13951896754008603, + "loss": 0.11676532030105591, + "loss_ce": 0.007710754871368408, + "loss_iou": 0.30859375, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 138399052, + "step": 1427 + }, + { + "epoch": 0.1396167383652718, + "grad_norm": 15.313298078357107, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 138496612, + "step": 1428 + }, + { + "epoch": 0.1396167383652718, + "loss": 0.13510459661483765, + "loss_ce": 0.005557470954954624, + "loss_iou": 0.34375, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 138496612, + "step": 1428 + }, + { + "epoch": 0.13971450919045758, + "grad_norm": 17.441864034206475, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 138592872, + "step": 1429 + }, + { + "epoch": 0.13971450919045758, + "loss": 0.11639875173568726, + "loss_ce": 0.003239573910832405, + "loss_iou": 0.341796875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 138592872, + "step": 1429 + }, + { + "epoch": 0.13981228001564333, + "grad_norm": 9.019149643416323, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 138688704, + "step": 1430 + }, + { + "epoch": 0.13981228001564333, + "loss": 0.08954715728759766, + "loss_ce": 0.005425452254712582, + "loss_iou": 0.2890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 138688704, + "step": 1430 + }, + { + "epoch": 0.1399100508408291, + "grad_norm": 12.791872136147392, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 138785904, + "step": 1431 + }, + { + "epoch": 0.1399100508408291, + "loss": 0.09442583471536636, + "loss_ce": 0.003468187525868416, + "loss_iou": 0.45703125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 138785904, + "step": 1431 + }, + { + "epoch": 0.14000782166601486, + "grad_norm": 9.467911283668688, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 138882260, + "step": 1432 + }, + { + "epoch": 0.14000782166601486, + "loss": 0.1488497406244278, + "loss_ce": 0.009628545492887497, + "loss_iou": 0.36328125, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 138882260, + "step": 1432 + }, + { + "epoch": 0.14010559249120064, + "grad_norm": 26.218081093416796, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 138979088, + "step": 1433 + }, + { + "epoch": 0.14010559249120064, + "loss": 0.11954790353775024, + "loss_ce": 0.005045954138040543, + "loss_iou": 0.4296875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 138979088, + "step": 1433 + }, + { + "epoch": 0.1402033633163864, + "grad_norm": 26.482819557842422, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 139075628, + "step": 1434 + }, + { + "epoch": 0.1402033633163864, + "loss": 0.09611000865697861, + "loss_ce": 0.007578512188047171, + "loss_iou": 0.396484375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 139075628, + "step": 1434 + }, + { + "epoch": 0.14030113414157216, + "grad_norm": 18.9555249514525, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 139173848, + "step": 1435 + }, + { + "epoch": 0.14030113414157216, + "loss": 0.09325563907623291, + "loss_ce": 0.006112698465585709, + "loss_iou": 0.37109375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 139173848, + "step": 1435 + }, + { + "epoch": 0.14039890496675791, + "grad_norm": 10.64917661770384, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 139270388, + "step": 1436 + }, + { + "epoch": 0.14039890496675791, + "loss": 0.08778989315032959, + "loss_ce": 0.005957007873803377, + "loss_iou": 0.3046875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 139270388, + "step": 1436 + }, + { + "epoch": 0.1404966757919437, + "grad_norm": 11.136161265215613, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 139367700, + "step": 1437 + }, + { + "epoch": 0.1404966757919437, + "loss": 0.07493533939123154, + "loss_ce": 0.003005408914759755, + "loss_iou": 0.365234375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 139367700, + "step": 1437 + }, + { + "epoch": 0.14059444661712944, + "grad_norm": 17.362350421650614, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 139464648, + "step": 1438 + }, + { + "epoch": 0.14059444661712944, + "loss": 0.0835217535495758, + "loss_ce": 0.007384210824966431, + "loss_iou": 0.28125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 139464648, + "step": 1438 + }, + { + "epoch": 0.14069221744231522, + "grad_norm": 6.348626837888998, + "learning_rate": 5e-05, + "loss": 0.1236, + "num_input_tokens_seen": 139561424, + "step": 1439 + }, + { + "epoch": 0.14069221744231522, + "loss": 0.12510329484939575, + "loss_ce": 0.003567043226212263, + "loss_iou": 0.3984375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 139561424, + "step": 1439 + }, + { + "epoch": 0.14078998826750097, + "grad_norm": 16.798808039891945, + "learning_rate": 5e-05, + "loss": 0.136, + "num_input_tokens_seen": 139658364, + "step": 1440 + }, + { + "epoch": 0.14078998826750097, + "loss": 0.15227510035037994, + "loss_ce": 0.008842477574944496, + "loss_iou": 0.322265625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 139658364, + "step": 1440 + }, + { + "epoch": 0.14088775909268675, + "grad_norm": 13.109396551388526, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 139755836, + "step": 1441 + }, + { + "epoch": 0.14088775909268675, + "loss": 0.10660892724990845, + "loss_ce": 0.006206092424690723, + "loss_iou": 0.3984375, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 139755836, + "step": 1441 + }, + { + "epoch": 0.1409855299178725, + "grad_norm": 10.453179519562402, + "learning_rate": 5e-05, + "loss": 0.1711, + "num_input_tokens_seen": 139852404, + "step": 1442 + }, + { + "epoch": 0.1409855299178725, + "loss": 0.19374476373195648, + "loss_ce": 0.007282366044819355, + "loss_iou": 0.310546875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 139852404, + "step": 1442 + }, + { + "epoch": 0.14108330074305828, + "grad_norm": 20.482561765284288, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 139949420, + "step": 1443 + }, + { + "epoch": 0.14108330074305828, + "loss": 0.115956611931324, + "loss_ce": 0.005757633130997419, + "loss_iou": 0.41796875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 139949420, + "step": 1443 + }, + { + "epoch": 0.14118107156824403, + "grad_norm": 6.357513302057685, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 140045368, + "step": 1444 + }, + { + "epoch": 0.14118107156824403, + "loss": 0.06401663273572922, + "loss_ce": 0.0041411384008824825, + "loss_iou": 0.20703125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 140045368, + "step": 1444 + }, + { + "epoch": 0.1412788423934298, + "grad_norm": 4.3063754410121895, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 140142796, + "step": 1445 + }, + { + "epoch": 0.1412788423934298, + "loss": 0.057904407382011414, + "loss_ce": 0.0035220813006162643, + "loss_iou": 0.380859375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 140142796, + "step": 1445 + }, + { + "epoch": 0.14137661321861555, + "grad_norm": 6.121172637317192, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 140239000, + "step": 1446 + }, + { + "epoch": 0.14137661321861555, + "loss": 0.0839822068810463, + "loss_ce": 0.0066048866137862206, + "loss_iou": 0.16796875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 140239000, + "step": 1446 + }, + { + "epoch": 0.14147438404380133, + "grad_norm": 13.726583266171108, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 140335360, + "step": 1447 + }, + { + "epoch": 0.14147438404380133, + "loss": 0.09280428290367126, + "loss_ce": 0.003761620493605733, + "loss_iou": 0.24609375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 140335360, + "step": 1447 + }, + { + "epoch": 0.14157215486898708, + "grad_norm": 19.650317145873117, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 140432124, + "step": 1448 + }, + { + "epoch": 0.14157215486898708, + "loss": 0.0913771241903305, + "loss_ce": 0.0076063768938183784, + "loss_iou": 0.31640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 140432124, + "step": 1448 + }, + { + "epoch": 0.14166992569417286, + "grad_norm": 10.754616342428035, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 140528412, + "step": 1449 + }, + { + "epoch": 0.14166992569417286, + "loss": 0.10866285115480423, + "loss_ce": 0.006398449186235666, + "loss_iou": 0.46875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 140528412, + "step": 1449 + }, + { + "epoch": 0.1417676965193586, + "grad_norm": 3.3373290213646567, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 140624960, + "step": 1450 + }, + { + "epoch": 0.1417676965193586, + "loss": 0.06138906627893448, + "loss_ce": 0.0012084046611562371, + "loss_iou": 0.380859375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 140624960, + "step": 1450 + }, + { + "epoch": 0.1418654673445444, + "grad_norm": 6.0875239432905355, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 140721344, + "step": 1451 + }, + { + "epoch": 0.1418654673445444, + "loss": 0.10559876263141632, + "loss_ce": 0.00347168673761189, + "loss_iou": 0.3046875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 140721344, + "step": 1451 + }, + { + "epoch": 0.14196323816973017, + "grad_norm": 9.80709690406183, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 140818552, + "step": 1452 + }, + { + "epoch": 0.14196323816973017, + "loss": 0.08297420293092728, + "loss_ce": 0.007855185307562351, + "loss_iou": 0.3359375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 140818552, + "step": 1452 + }, + { + "epoch": 0.14206100899491592, + "grad_norm": 13.579731788797028, + "learning_rate": 5e-05, + "loss": 0.1163, + "num_input_tokens_seen": 140914884, + "step": 1453 + }, + { + "epoch": 0.14206100899491592, + "loss": 0.13030536472797394, + "loss_ce": 0.0038710401859134436, + "loss_iou": 0.380859375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 140914884, + "step": 1453 + }, + { + "epoch": 0.1421587798201017, + "grad_norm": 21.78494086053356, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 141011476, + "step": 1454 + }, + { + "epoch": 0.1421587798201017, + "loss": 0.07768745720386505, + "loss_ce": 0.003911216743290424, + "loss_iou": 0.341796875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 141011476, + "step": 1454 + }, + { + "epoch": 0.14225655064528744, + "grad_norm": 14.111718696174696, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 141108108, + "step": 1455 + }, + { + "epoch": 0.14225655064528744, + "loss": 0.09866629540920258, + "loss_ce": 0.007632357068359852, + "loss_iou": 0.3046875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 141108108, + "step": 1455 + }, + { + "epoch": 0.14235432147047322, + "grad_norm": 7.779996643532491, + "learning_rate": 5e-05, + "loss": 0.1204, + "num_input_tokens_seen": 141205112, + "step": 1456 + }, + { + "epoch": 0.14235432147047322, + "loss": 0.16541370749473572, + "loss_ce": 0.0031517441384494305, + "loss_iou": 0.455078125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 141205112, + "step": 1456 + }, + { + "epoch": 0.14245209229565897, + "grad_norm": 13.720331239997048, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 141302304, + "step": 1457 + }, + { + "epoch": 0.14245209229565897, + "loss": 0.0895206481218338, + "loss_ce": 0.006381222512573004, + "loss_iou": 0.337890625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 141302304, + "step": 1457 + }, + { + "epoch": 0.14254986312084475, + "grad_norm": 15.615544314034333, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 141399388, + "step": 1458 + }, + { + "epoch": 0.14254986312084475, + "loss": 0.12196949869394302, + "loss_ce": 0.008077897131443024, + "loss_iou": 0.40625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 141399388, + "step": 1458 + }, + { + "epoch": 0.1426476339460305, + "grad_norm": 5.799571876773489, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 141496424, + "step": 1459 + }, + { + "epoch": 0.1426476339460305, + "loss": 0.09866783767938614, + "loss_ce": 0.00476524792611599, + "loss_iou": 0.390625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 141496424, + "step": 1459 + }, + { + "epoch": 0.14274540477121628, + "grad_norm": 3.663182813278226, + "learning_rate": 5e-05, + "loss": 0.1356, + "num_input_tokens_seen": 141593700, + "step": 1460 + }, + { + "epoch": 0.14274540477121628, + "loss": 0.11394106596708298, + "loss_ce": 0.0074652377516031265, + "loss_iou": 0.423828125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 141593700, + "step": 1460 + }, + { + "epoch": 0.14284317559640203, + "grad_norm": 19.893983078756047, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 141691012, + "step": 1461 + }, + { + "epoch": 0.14284317559640203, + "loss": 0.12237538397312164, + "loss_ce": 0.004180805291980505, + "loss_iou": 0.462890625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 141691012, + "step": 1461 + }, + { + "epoch": 0.1429409464215878, + "grad_norm": 35.84610363686477, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 141787944, + "step": 1462 + }, + { + "epoch": 0.1429409464215878, + "loss": 0.12215497344732285, + "loss_ce": 0.005669377278536558, + "loss_iou": 0.51953125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 141787944, + "step": 1462 + }, + { + "epoch": 0.14303871724677356, + "grad_norm": 22.487287266515846, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 141884364, + "step": 1463 + }, + { + "epoch": 0.14303871724677356, + "loss": 0.10639827698469162, + "loss_ce": 0.006498983129858971, + "loss_iou": 0.310546875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 141884364, + "step": 1463 + }, + { + "epoch": 0.14313648807195933, + "grad_norm": 4.2767264309792505, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 141981568, + "step": 1464 + }, + { + "epoch": 0.14313648807195933, + "loss": 0.0835576206445694, + "loss_ce": 0.001190672628581524, + "loss_iou": 0.31640625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 141981568, + "step": 1464 + }, + { + "epoch": 0.14323425889714508, + "grad_norm": 22.743430773697508, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 142079264, + "step": 1465 + }, + { + "epoch": 0.14323425889714508, + "loss": 0.08074122667312622, + "loss_ce": 0.007209125906229019, + "loss_iou": 0.3203125, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 142079264, + "step": 1465 + }, + { + "epoch": 0.14333202972233086, + "grad_norm": 16.08291885997422, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 142175112, + "step": 1466 + }, + { + "epoch": 0.14333202972233086, + "loss": 0.07403101027011871, + "loss_ce": 0.005000250414013863, + "loss_iou": 0.251953125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 142175112, + "step": 1466 + }, + { + "epoch": 0.1434298005475166, + "grad_norm": 23.508417378431275, + "learning_rate": 5e-05, + "loss": 0.1372, + "num_input_tokens_seen": 142272984, + "step": 1467 + }, + { + "epoch": 0.1434298005475166, + "loss": 0.13885530829429626, + "loss_ce": 0.00674470653757453, + "loss_iou": 0.478515625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 142272984, + "step": 1467 + }, + { + "epoch": 0.1435275713727024, + "grad_norm": 30.599687849164233, + "learning_rate": 5e-05, + "loss": 0.1161, + "num_input_tokens_seen": 142369684, + "step": 1468 + }, + { + "epoch": 0.1435275713727024, + "loss": 0.10469596832990646, + "loss_ce": 0.005666426382958889, + "loss_iou": 0.35546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 142369684, + "step": 1468 + }, + { + "epoch": 0.14362534219788814, + "grad_norm": 13.56245612925449, + "learning_rate": 5e-05, + "loss": 0.1945, + "num_input_tokens_seen": 142466648, + "step": 1469 + }, + { + "epoch": 0.14362534219788814, + "loss": 0.18695016205310822, + "loss_ce": 0.008849581703543663, + "loss_iou": 0.341796875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 142466648, + "step": 1469 + }, + { + "epoch": 0.14372311302307392, + "grad_norm": 7.194645912935961, + "learning_rate": 5e-05, + "loss": 0.157, + "num_input_tokens_seen": 142563356, + "step": 1470 + }, + { + "epoch": 0.14372311302307392, + "loss": 0.1540966033935547, + "loss_ce": 0.003950109239667654, + "loss_iou": 0.3359375, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 142563356, + "step": 1470 + }, + { + "epoch": 0.14382088384825967, + "grad_norm": 19.432322381304505, + "learning_rate": 5e-05, + "loss": 0.1421, + "num_input_tokens_seen": 142659808, + "step": 1471 + }, + { + "epoch": 0.14382088384825967, + "loss": 0.13827967643737793, + "loss_ce": 0.006382710766047239, + "loss_iou": 0.451171875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 142659808, + "step": 1471 + }, + { + "epoch": 0.14391865467344545, + "grad_norm": 3.43981201850157, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 142758048, + "step": 1472 + }, + { + "epoch": 0.14391865467344545, + "loss": 0.1186341717839241, + "loss_ce": 0.006634661927819252, + "loss_iou": 0.48046875, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 142758048, + "step": 1472 + }, + { + "epoch": 0.1440164254986312, + "grad_norm": 7.274686317614159, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 142856096, + "step": 1473 + }, + { + "epoch": 0.1440164254986312, + "loss": 0.08529848605394363, + "loss_ce": 0.0013751479564234614, + "loss_iou": 0.47265625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 142856096, + "step": 1473 + }, + { + "epoch": 0.14411419632381697, + "grad_norm": 11.83805623211717, + "learning_rate": 5e-05, + "loss": 0.2162, + "num_input_tokens_seen": 142952880, + "step": 1474 + }, + { + "epoch": 0.14411419632381697, + "loss": 0.18261870741844177, + "loss_ce": 0.006471236236393452, + "loss_iou": 0.5078125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 142952880, + "step": 1474 + }, + { + "epoch": 0.14421196714900275, + "grad_norm": 9.466467108291821, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 143049928, + "step": 1475 + }, + { + "epoch": 0.14421196714900275, + "loss": 0.09099742770195007, + "loss_ce": 0.005670278333127499, + "loss_iou": 0.314453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 143049928, + "step": 1475 + }, + { + "epoch": 0.1443097379741885, + "grad_norm": 5.467537013668495, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 143147060, + "step": 1476 + }, + { + "epoch": 0.1443097379741885, + "loss": 0.11311361193656921, + "loss_ce": 0.005111899692565203, + "loss_iou": 0.40234375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 143147060, + "step": 1476 + }, + { + "epoch": 0.14440750879937428, + "grad_norm": 7.3271356124416105, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 143245264, + "step": 1477 + }, + { + "epoch": 0.14440750879937428, + "loss": 0.1341674029827118, + "loss_ce": 0.0055663385428488255, + "loss_iou": 0.41796875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 143245264, + "step": 1477 + }, + { + "epoch": 0.14450527962456003, + "grad_norm": 21.897392277921767, + "learning_rate": 5e-05, + "loss": 0.1342, + "num_input_tokens_seen": 143340356, + "step": 1478 + }, + { + "epoch": 0.14450527962456003, + "loss": 0.14682094752788544, + "loss_ce": 0.004792134277522564, + "loss_iou": 0.330078125, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 143340356, + "step": 1478 + }, + { + "epoch": 0.1446030504497458, + "grad_norm": 15.92870202379428, + "learning_rate": 5e-05, + "loss": 0.1125, + "num_input_tokens_seen": 143437128, + "step": 1479 + }, + { + "epoch": 0.1446030504497458, + "loss": 0.09103211760520935, + "loss_ce": 0.007024857215583324, + "loss_iou": 0.357421875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 143437128, + "step": 1479 + }, + { + "epoch": 0.14470082127493156, + "grad_norm": 10.867847306065125, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 143534200, + "step": 1480 + }, + { + "epoch": 0.14470082127493156, + "loss": 0.08995994925498962, + "loss_ce": 0.0050781709142029285, + "loss_iou": 0.333984375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 143534200, + "step": 1480 + }, + { + "epoch": 0.14479859210011733, + "grad_norm": 4.306714277245803, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 143631380, + "step": 1481 + }, + { + "epoch": 0.14479859210011733, + "loss": 0.10270938277244568, + "loss_ce": 0.002901642583310604, + "loss_iou": 0.400390625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 143631380, + "step": 1481 + }, + { + "epoch": 0.14489636292530308, + "grad_norm": 9.099438636953822, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 143728856, + "step": 1482 + }, + { + "epoch": 0.14489636292530308, + "loss": 0.09824565798044205, + "loss_ce": 0.0024814975913614035, + "loss_iou": 0.31640625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 143728856, + "step": 1482 + }, + { + "epoch": 0.14499413375048886, + "grad_norm": 9.058590205640781, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 143826036, + "step": 1483 + }, + { + "epoch": 0.14499413375048886, + "loss": 0.06479156017303467, + "loss_ce": 0.0035427820403128862, + "loss_iou": 0.29296875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 143826036, + "step": 1483 + }, + { + "epoch": 0.1450919045756746, + "grad_norm": 13.075306084103993, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 143922460, + "step": 1484 + }, + { + "epoch": 0.1450919045756746, + "loss": 0.08192352950572968, + "loss_ce": 0.008147284388542175, + "loss_iou": 0.28125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 143922460, + "step": 1484 + }, + { + "epoch": 0.1451896754008604, + "grad_norm": 9.119544441665326, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 144019808, + "step": 1485 + }, + { + "epoch": 0.1451896754008604, + "loss": 0.0821867287158966, + "loss_ce": 0.004427935462445021, + "loss_iou": 0.400390625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 144019808, + "step": 1485 + }, + { + "epoch": 0.14528744622604614, + "grad_norm": 7.5785058698483825, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 144116640, + "step": 1486 + }, + { + "epoch": 0.14528744622604614, + "loss": 0.10601159930229187, + "loss_ce": 0.0072261979803442955, + "loss_iou": 0.361328125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 144116640, + "step": 1486 + }, + { + "epoch": 0.14538521705123192, + "grad_norm": 13.027805483649175, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 144214580, + "step": 1487 + }, + { + "epoch": 0.14538521705123192, + "loss": 0.10911446064710617, + "loss_ce": 0.006117632612586021, + "loss_iou": 0.44140625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 144214580, + "step": 1487 + }, + { + "epoch": 0.14548298787641767, + "grad_norm": 15.211898636786247, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 144310832, + "step": 1488 + }, + { + "epoch": 0.14548298787641767, + "loss": 0.09738524258136749, + "loss_ce": 0.004581287037581205, + "loss_iou": 0.24609375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 144310832, + "step": 1488 + }, + { + "epoch": 0.14558075870160345, + "grad_norm": 15.354064260088615, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 144407804, + "step": 1489 + }, + { + "epoch": 0.14558075870160345, + "loss": 0.10330229252576828, + "loss_ce": 0.0019915637094527483, + "loss_iou": 0.37109375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 144407804, + "step": 1489 + }, + { + "epoch": 0.1456785295267892, + "grad_norm": 3.717813658829543, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 144504252, + "step": 1490 + }, + { + "epoch": 0.1456785295267892, + "loss": 0.08392414450645447, + "loss_ce": 0.0026253212708979845, + "loss_iou": 0.302734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 144504252, + "step": 1490 + }, + { + "epoch": 0.14577630035197497, + "grad_norm": 9.175060232778241, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 144601828, + "step": 1491 + }, + { + "epoch": 0.14577630035197497, + "loss": 0.07641749083995819, + "loss_ce": 0.0032477842178195715, + "loss_iou": 0.31640625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 144601828, + "step": 1491 + }, + { + "epoch": 0.14587407117716072, + "grad_norm": 36.44512780087164, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 144699200, + "step": 1492 + }, + { + "epoch": 0.14587407117716072, + "loss": 0.1469472050666809, + "loss_ce": 0.00404864689335227, + "loss_iou": 0.3203125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 144699200, + "step": 1492 + }, + { + "epoch": 0.1459718420023465, + "grad_norm": 13.73154064665317, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 144797716, + "step": 1493 + }, + { + "epoch": 0.1459718420023465, + "loss": 0.09254966676235199, + "loss_ce": 0.0033925657626241446, + "loss_iou": 0.3671875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 144797716, + "step": 1493 + }, + { + "epoch": 0.14606961282753225, + "grad_norm": 7.155635138300461, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 144895336, + "step": 1494 + }, + { + "epoch": 0.14606961282753225, + "loss": 0.09143202006816864, + "loss_ce": 0.00320570170879364, + "loss_iou": 0.443359375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 144895336, + "step": 1494 + }, + { + "epoch": 0.14616738365271803, + "grad_norm": 5.855452245021135, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 144991812, + "step": 1495 + }, + { + "epoch": 0.14616738365271803, + "loss": 0.09204621613025665, + "loss_ce": 0.0061087170615792274, + "loss_iou": 0.41015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 144991812, + "step": 1495 + }, + { + "epoch": 0.14626515447790378, + "grad_norm": 8.610399122795627, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 145087980, + "step": 1496 + }, + { + "epoch": 0.14626515447790378, + "loss": 0.09014289081096649, + "loss_ce": 0.007562318351119757, + "loss_iou": 0.287109375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 145087980, + "step": 1496 + }, + { + "epoch": 0.14636292530308956, + "grad_norm": 14.27252618881449, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 145184900, + "step": 1497 + }, + { + "epoch": 0.14636292530308956, + "loss": 0.08632537722587585, + "loss_ce": 0.005804746877402067, + "loss_iou": 0.333984375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 145184900, + "step": 1497 + }, + { + "epoch": 0.14646069612827534, + "grad_norm": 5.88946216014282, + "learning_rate": 5e-05, + "loss": 0.1217, + "num_input_tokens_seen": 145282156, + "step": 1498 + }, + { + "epoch": 0.14646069612827534, + "loss": 0.10505492240190506, + "loss_ce": 0.00883299857378006, + "loss_iou": 0.306640625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 145282156, + "step": 1498 + }, + { + "epoch": 0.1465584669534611, + "grad_norm": 14.512631742502958, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 145379392, + "step": 1499 + }, + { + "epoch": 0.1465584669534611, + "loss": 0.11069786548614502, + "loss_ce": 0.008174054324626923, + "loss_iou": 0.41015625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 145379392, + "step": 1499 + }, + { + "epoch": 0.14665623777864686, + "grad_norm": 14.513620796642904, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 145476476, + "step": 1500 + }, + { + "epoch": 0.14665623777864686, + "eval_seeclick_CIoU": 0.3617057204246521, + "eval_seeclick_GIoU": 0.35515178740024567, + "eval_seeclick_IoU": 0.4314703494310379, + "eval_seeclick_MAE_all": 0.10461810603737831, + "eval_seeclick_MAE_h": 0.0428733192384243, + "eval_seeclick_MAE_w": 0.14825952798128128, + "eval_seeclick_MAE_x": 0.1829281747341156, + "eval_seeclick_MAE_y": 0.044411396607756615, + "eval_seeclick_NUM_probability": 0.9997389316558838, + "eval_seeclick_inside_bbox": 0.7528409063816071, + "eval_seeclick_loss": 0.38579148054122925, + "eval_seeclick_loss_ce": 0.010364445392042398, + "eval_seeclick_loss_iou": 0.4881591796875, + "eval_seeclick_loss_num": 0.07248687744140625, + "eval_seeclick_loss_xval": 0.36248779296875, + "eval_seeclick_runtime": 74.1554, + "eval_seeclick_samples_per_second": 0.58, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 145476476, + "step": 1500 + }, + { + "epoch": 0.14665623777864686, + "eval_icons_CIoU": 0.7176692485809326, + "eval_icons_GIoU": 0.7139537632465363, + "eval_icons_IoU": 0.7323802709579468, + "eval_icons_MAE_all": 0.03931507468223572, + "eval_icons_MAE_h": 0.04175834450870752, + "eval_icons_MAE_w": 0.04090461228042841, + "eval_icons_MAE_x": 0.03842565603554249, + "eval_icons_MAE_y": 0.036171672865748405, + "eval_icons_NUM_probability": 0.9998466372489929, + "eval_icons_inside_bbox": 0.9375, + "eval_icons_loss": 0.15962867438793182, + "eval_icons_loss_ce": 5.4749096307205036e-05, + "eval_icons_loss_iou": 0.5611572265625, + "eval_icons_loss_num": 0.036067962646484375, + "eval_icons_loss_xval": 0.180419921875, + "eval_icons_runtime": 84.429, + "eval_icons_samples_per_second": 0.592, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 145476476, + "step": 1500 + }, + { + "epoch": 0.14665623777864686, + "eval_screenspot_CIoU": 0.1673323300977548, + "eval_screenspot_GIoU": 0.14279627799987793, + "eval_screenspot_IoU": 0.29830242196718854, + "eval_screenspot_MAE_all": 0.20978017648061117, + "eval_screenspot_MAE_h": 0.14038154234488806, + "eval_screenspot_MAE_w": 0.3105545441309611, + "eval_screenspot_MAE_x": 0.2564181238412857, + "eval_screenspot_MAE_y": 0.13176647822062174, + "eval_screenspot_NUM_probability": 0.9999088048934937, + "eval_screenspot_inside_bbox": 0.5587499936421713, + "eval_screenspot_loss": 0.718844473361969, + "eval_screenspot_loss_ce": 0.0241878026475509, + "eval_screenspot_loss_iou": 0.3546142578125, + "eval_screenspot_loss_num": 0.141937255859375, + "eval_screenspot_loss_xval": 0.7089029947916666, + "eval_screenspot_runtime": 145.4833, + "eval_screenspot_samples_per_second": 0.612, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 145476476, + "step": 1500 + }, + { + "epoch": 0.14665623777864686, + "eval_compot_CIoU": 0.3811185359954834, + "eval_compot_GIoU": 0.3661044239997864, + "eval_compot_IoU": 0.46668554842472076, + "eval_compot_MAE_all": 0.11072687804698944, + "eval_compot_MAE_h": 0.10089898854494095, + "eval_compot_MAE_w": 0.12054969742894173, + "eval_compot_MAE_x": 0.11734602227807045, + "eval_compot_MAE_y": 0.10411283001303673, + "eval_compot_NUM_probability": 0.9999037384986877, + "eval_compot_inside_bbox": 0.6336805522441864, + "eval_compot_loss": 0.36955249309539795, + "eval_compot_loss_ce": 0.02781256940215826, + "eval_compot_loss_iou": 0.44708251953125, + "eval_compot_loss_num": 0.06495285034179688, + "eval_compot_loss_xval": 0.32452392578125, + "eval_compot_runtime": 95.5239, + "eval_compot_samples_per_second": 0.523, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 145476476, + "step": 1500 + }, + { + "epoch": 0.14675400860383261, + "grad_norm": 23.407716141403654, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 145574776, + "step": 1501 + }, + { + "epoch": 0.14675400860383261, + "loss": 0.11603444069623947, + "loss_ce": 0.006171158514916897, + "loss_iou": 0.53125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 145574776, + "step": 1501 + }, + { + "epoch": 0.1468517794290184, + "grad_norm": 26.54277973343516, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 145672308, + "step": 1502 + }, + { + "epoch": 0.1468517794290184, + "loss": 0.10892118513584137, + "loss_ce": 0.006015912629663944, + "loss_iou": 0.490234375, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 145672308, + "step": 1502 + }, + { + "epoch": 0.14694955025420414, + "grad_norm": 14.937468709273361, + "learning_rate": 5e-05, + "loss": 0.143, + "num_input_tokens_seen": 145770396, + "step": 1503 + }, + { + "epoch": 0.14694955025420414, + "loss": 0.1772148460149765, + "loss_ce": 0.010314201936125755, + "loss_iou": 0.388671875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 145770396, + "step": 1503 + }, + { + "epoch": 0.14704732107938992, + "grad_norm": 4.543546893941664, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 145866600, + "step": 1504 + }, + { + "epoch": 0.14704732107938992, + "loss": 0.10717739164829254, + "loss_ce": 0.003860135329887271, + "loss_iou": 0.34765625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 145866600, + "step": 1504 + }, + { + "epoch": 0.14714509190457567, + "grad_norm": 9.387906445626674, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 145964168, + "step": 1505 + }, + { + "epoch": 0.14714509190457567, + "loss": 0.08909399062395096, + "loss_ce": 0.003888907842338085, + "loss_iou": 0.375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 145964168, + "step": 1505 + }, + { + "epoch": 0.14724286272976145, + "grad_norm": 4.945556725061289, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 146061380, + "step": 1506 + }, + { + "epoch": 0.14724286272976145, + "loss": 0.05811968445777893, + "loss_ce": 0.0027150199748575687, + "loss_iou": 0.423828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 146061380, + "step": 1506 + }, + { + "epoch": 0.1473406335549472, + "grad_norm": 8.334438794946031, + "learning_rate": 5e-05, + "loss": 0.1271, + "num_input_tokens_seen": 146157320, + "step": 1507 + }, + { + "epoch": 0.1473406335549472, + "loss": 0.18337847292423248, + "loss_ce": 0.007414118852466345, + "loss_iou": 0.34765625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 146157320, + "step": 1507 + }, + { + "epoch": 0.14743840438013298, + "grad_norm": 4.020429004064455, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 146254248, + "step": 1508 + }, + { + "epoch": 0.14743840438013298, + "loss": 0.13173800706863403, + "loss_ce": 0.007561973296105862, + "loss_iou": 0.341796875, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 146254248, + "step": 1508 + }, + { + "epoch": 0.14753617520531873, + "grad_norm": 5.584454264519735, + "learning_rate": 5e-05, + "loss": 0.1372, + "num_input_tokens_seen": 146350620, + "step": 1509 + }, + { + "epoch": 0.14753617520531873, + "loss": 0.14179086685180664, + "loss_ce": 0.0047059073112905025, + "loss_iou": 0.34765625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 146350620, + "step": 1509 + }, + { + "epoch": 0.1476339460305045, + "grad_norm": 5.289910052075246, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 146448020, + "step": 1510 + }, + { + "epoch": 0.1476339460305045, + "loss": 0.1333404779434204, + "loss_ce": 0.0018402265850454569, + "loss_iou": 0.34765625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 146448020, + "step": 1510 + }, + { + "epoch": 0.14773171685569025, + "grad_norm": 9.860038462394755, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 146544852, + "step": 1511 + }, + { + "epoch": 0.14773171685569025, + "loss": 0.08366438746452332, + "loss_ce": 0.003860918339341879, + "loss_iou": 0.3828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 146544852, + "step": 1511 + }, + { + "epoch": 0.14782948768087603, + "grad_norm": 3.164488230840286, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 146641908, + "step": 1512 + }, + { + "epoch": 0.14782948768087603, + "loss": 0.0861583799123764, + "loss_ce": 0.012687306851148605, + "loss_iou": 0.228515625, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 146641908, + "step": 1512 + }, + { + "epoch": 0.14792725850606178, + "grad_norm": 4.9389546203878645, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 146738036, + "step": 1513 + }, + { + "epoch": 0.14792725850606178, + "loss": 0.09806270897388458, + "loss_ce": 0.008905601687729359, + "loss_iou": 0.185546875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 146738036, + "step": 1513 + }, + { + "epoch": 0.14802502933124756, + "grad_norm": 8.699361929435044, + "learning_rate": 5e-05, + "loss": 0.1167, + "num_input_tokens_seen": 146834272, + "step": 1514 + }, + { + "epoch": 0.14802502933124756, + "loss": 0.12333767116069794, + "loss_ce": 0.005722927860915661, + "loss_iou": 0.4140625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 146834272, + "step": 1514 + }, + { + "epoch": 0.1481228001564333, + "grad_norm": 8.585695914454112, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 146931644, + "step": 1515 + }, + { + "epoch": 0.1481228001564333, + "loss": 0.10925552994012833, + "loss_ce": 0.0011927831219509244, + "loss_iou": 0.435546875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 146931644, + "step": 1515 + }, + { + "epoch": 0.1482205709816191, + "grad_norm": 34.99016744479975, + "learning_rate": 5e-05, + "loss": 0.1489, + "num_input_tokens_seen": 147028340, + "step": 1516 + }, + { + "epoch": 0.1482205709816191, + "loss": 0.12809139490127563, + "loss_ce": 0.006600908003747463, + "loss_iou": 0.25, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 147028340, + "step": 1516 + }, + { + "epoch": 0.14831834180680484, + "grad_norm": 11.326012341792685, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 147124568, + "step": 1517 + }, + { + "epoch": 0.14831834180680484, + "loss": 0.0734841525554657, + "loss_ce": 0.004384727217257023, + "loss_iou": 0.34375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 147124568, + "step": 1517 + }, + { + "epoch": 0.14841611263199062, + "grad_norm": 6.984559207321551, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 147221264, + "step": 1518 + }, + { + "epoch": 0.14841611263199062, + "loss": 0.08102332800626755, + "loss_ce": 0.007964248768985271, + "loss_iou": 0.408203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 147221264, + "step": 1518 + }, + { + "epoch": 0.14851388345717637, + "grad_norm": 6.134290468658458, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 147318492, + "step": 1519 + }, + { + "epoch": 0.14851388345717637, + "loss": 0.10241842269897461, + "loss_ce": 0.007035728543996811, + "loss_iou": 0.275390625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 147318492, + "step": 1519 + }, + { + "epoch": 0.14861165428236214, + "grad_norm": 11.374993483262486, + "learning_rate": 5e-05, + "loss": 0.139, + "num_input_tokens_seen": 147415964, + "step": 1520 + }, + { + "epoch": 0.14861165428236214, + "loss": 0.14964058995246887, + "loss_ce": 0.007611787877976894, + "loss_iou": 0.404296875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 147415964, + "step": 1520 + }, + { + "epoch": 0.14870942510754792, + "grad_norm": 10.942872385716287, + "learning_rate": 5e-05, + "loss": 0.1304, + "num_input_tokens_seen": 147513088, + "step": 1521 + }, + { + "epoch": 0.14870942510754792, + "loss": 0.16546577215194702, + "loss_ce": 0.004363487474620342, + "loss_iou": 0.404296875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 147513088, + "step": 1521 + }, + { + "epoch": 0.14880719593273367, + "grad_norm": 10.852495159818485, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 147610304, + "step": 1522 + }, + { + "epoch": 0.14880719593273367, + "loss": 0.06083395332098007, + "loss_ce": 0.0046053193509578705, + "loss_iou": 0.3359375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 147610304, + "step": 1522 + }, + { + "epoch": 0.14890496675791945, + "grad_norm": 8.245543652275973, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 147707700, + "step": 1523 + }, + { + "epoch": 0.14890496675791945, + "loss": 0.14964279532432556, + "loss_ce": 0.002242889953777194, + "loss_iou": 0.244140625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 147707700, + "step": 1523 + }, + { + "epoch": 0.1490027375831052, + "grad_norm": 18.938756116132193, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 147803784, + "step": 1524 + }, + { + "epoch": 0.1490027375831052, + "loss": 0.06540262699127197, + "loss_ce": 0.007510783616453409, + "loss_iou": 0.236328125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 147803784, + "step": 1524 + }, + { + "epoch": 0.14910050840829098, + "grad_norm": 4.127576701540956, + "learning_rate": 5e-05, + "loss": 0.12, + "num_input_tokens_seen": 147900924, + "step": 1525 + }, + { + "epoch": 0.14910050840829098, + "loss": 0.10675205290317535, + "loss_ce": 0.006135600619018078, + "loss_iou": 0.36328125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 147900924, + "step": 1525 + }, + { + "epoch": 0.14919827923347673, + "grad_norm": 4.496399208929655, + "learning_rate": 5e-05, + "loss": 0.1182, + "num_input_tokens_seen": 147999244, + "step": 1526 + }, + { + "epoch": 0.14919827923347673, + "loss": 0.15255779027938843, + "loss_ce": 0.006317547522485256, + "loss_iou": 0.44921875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 147999244, + "step": 1526 + }, + { + "epoch": 0.1492960500586625, + "grad_norm": 10.246146341342742, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 148096248, + "step": 1527 + }, + { + "epoch": 0.1492960500586625, + "loss": 0.08792725950479507, + "loss_ce": 0.011663831770420074, + "loss_iou": 0.30859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 148096248, + "step": 1527 + }, + { + "epoch": 0.14939382088384826, + "grad_norm": 11.586077054738759, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 148193208, + "step": 1528 + }, + { + "epoch": 0.14939382088384826, + "loss": 0.08240008354187012, + "loss_ce": 0.007586240768432617, + "loss_iou": 0.283203125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 148193208, + "step": 1528 + }, + { + "epoch": 0.14949159170903403, + "grad_norm": 10.402901496345503, + "learning_rate": 5e-05, + "loss": 0.1559, + "num_input_tokens_seen": 148290988, + "step": 1529 + }, + { + "epoch": 0.14949159170903403, + "loss": 0.15732356905937195, + "loss_ce": 0.0029656486585736275, + "loss_iou": 0.40625, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 148290988, + "step": 1529 + }, + { + "epoch": 0.14958936253421978, + "grad_norm": 13.387353941581917, + "learning_rate": 5e-05, + "loss": 0.1641, + "num_input_tokens_seen": 148387880, + "step": 1530 + }, + { + "epoch": 0.14958936253421978, + "loss": 0.1425780951976776, + "loss_ce": 0.011383028700947762, + "loss_iou": 0.3046875, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 148387880, + "step": 1530 + }, + { + "epoch": 0.14968713335940556, + "grad_norm": 14.941458037939258, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 148484340, + "step": 1531 + }, + { + "epoch": 0.14968713335940556, + "loss": 0.07142981886863708, + "loss_ce": 0.0014682693872600794, + "loss_iou": 0.396484375, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 148484340, + "step": 1531 + }, + { + "epoch": 0.1497849041845913, + "grad_norm": 3.2310588435239986, + "learning_rate": 5e-05, + "loss": 0.155, + "num_input_tokens_seen": 148581560, + "step": 1532 + }, + { + "epoch": 0.1497849041845913, + "loss": 0.12840516865253448, + "loss_ce": 0.005846572574228048, + "loss_iou": 0.2890625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 148581560, + "step": 1532 + }, + { + "epoch": 0.1498826750097771, + "grad_norm": 13.558405731632538, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 148678704, + "step": 1533 + }, + { + "epoch": 0.1498826750097771, + "loss": 0.07101841270923615, + "loss_ce": 0.011387061327695847, + "loss_iou": 0.275390625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 148678704, + "step": 1533 + }, + { + "epoch": 0.14998044583496284, + "grad_norm": 6.8568892655071805, + "learning_rate": 5e-05, + "loss": 0.1207, + "num_input_tokens_seen": 148776488, + "step": 1534 + }, + { + "epoch": 0.14998044583496284, + "loss": 0.09149959683418274, + "loss_ce": 0.0028002606704831123, + "loss_iou": 0.384765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 148776488, + "step": 1534 + }, + { + "epoch": 0.15007821666014862, + "grad_norm": 31.929314599808215, + "learning_rate": 5e-05, + "loss": 0.1342, + "num_input_tokens_seen": 148874088, + "step": 1535 + }, + { + "epoch": 0.15007821666014862, + "loss": 0.13391843438148499, + "loss_ce": 0.010474837385118008, + "loss_iou": 0.361328125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 148874088, + "step": 1535 + }, + { + "epoch": 0.15017598748533437, + "grad_norm": 19.769801373205464, + "learning_rate": 5e-05, + "loss": 0.1723, + "num_input_tokens_seen": 148971236, + "step": 1536 + }, + { + "epoch": 0.15017598748533437, + "loss": 0.16343210637569427, + "loss_ce": 0.0076703946106135845, + "loss_iou": 0.40234375, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 148971236, + "step": 1536 + }, + { + "epoch": 0.15027375831052014, + "grad_norm": 48.006643654917404, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 149068704, + "step": 1537 + }, + { + "epoch": 0.15027375831052014, + "loss": 0.1367904096841812, + "loss_ce": 0.008952276781201363, + "loss_iou": 0.314453125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 149068704, + "step": 1537 + }, + { + "epoch": 0.1503715291357059, + "grad_norm": 10.819031197706146, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 149164900, + "step": 1538 + }, + { + "epoch": 0.1503715291357059, + "loss": 0.13493216037750244, + "loss_ce": 0.006788852624595165, + "loss_iou": 0.3125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 149164900, + "step": 1538 + }, + { + "epoch": 0.15046929996089167, + "grad_norm": 7.660954579515355, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 149262312, + "step": 1539 + }, + { + "epoch": 0.15046929996089167, + "loss": 0.10389664024114609, + "loss_ce": 0.007705232594162226, + "loss_iou": 0.28515625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 149262312, + "step": 1539 + }, + { + "epoch": 0.15056707078607742, + "grad_norm": 11.084932442734397, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 149359652, + "step": 1540 + }, + { + "epoch": 0.15056707078607742, + "loss": 0.10311703383922577, + "loss_ce": 0.009252590127289295, + "loss_iou": 0.388671875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 149359652, + "step": 1540 + }, + { + "epoch": 0.1506648416112632, + "grad_norm": 8.097688161736391, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 149456764, + "step": 1541 + }, + { + "epoch": 0.1506648416112632, + "loss": 0.1364637166261673, + "loss_ce": 0.006474093999713659, + "loss_iou": 0.39453125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 149456764, + "step": 1541 + }, + { + "epoch": 0.15076261243644895, + "grad_norm": 13.647686998244868, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 149553184, + "step": 1542 + }, + { + "epoch": 0.15076261243644895, + "loss": 0.12348343431949615, + "loss_ce": 0.00744415819644928, + "loss_iou": 0.326171875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 149553184, + "step": 1542 + }, + { + "epoch": 0.15086038326163473, + "grad_norm": 2.9893864041969898, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 149649968, + "step": 1543 + }, + { + "epoch": 0.15086038326163473, + "loss": 0.10051990300416946, + "loss_ce": 0.00829578097909689, + "loss_iou": 0.443359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 149649968, + "step": 1543 + }, + { + "epoch": 0.1509581540868205, + "grad_norm": 12.979804862140462, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 149746940, + "step": 1544 + }, + { + "epoch": 0.1509581540868205, + "loss": 0.13485172390937805, + "loss_ce": 0.008020667359232903, + "loss_iou": 0.419921875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 149746940, + "step": 1544 + }, + { + "epoch": 0.15105592491200626, + "grad_norm": 26.55125889570177, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 149843332, + "step": 1545 + }, + { + "epoch": 0.15105592491200626, + "loss": 0.09934716671705246, + "loss_ce": 0.0056887222453951836, + "loss_iou": 0.3671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 149843332, + "step": 1545 + }, + { + "epoch": 0.15115369573719203, + "grad_norm": 6.587476075216688, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 149939892, + "step": 1546 + }, + { + "epoch": 0.15115369573719203, + "loss": 0.06722968816757202, + "loss_ce": 0.006980363745242357, + "loss_iou": 0.33984375, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 149939892, + "step": 1546 + }, + { + "epoch": 0.15125146656237778, + "grad_norm": 7.285204892942371, + "learning_rate": 5e-05, + "loss": 0.1182, + "num_input_tokens_seen": 150037224, + "step": 1547 + }, + { + "epoch": 0.15125146656237778, + "loss": 0.13452869653701782, + "loss_ce": 0.010078012011945248, + "loss_iou": 0.328125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 150037224, + "step": 1547 + }, + { + "epoch": 0.15134923738756356, + "grad_norm": 15.397817457182747, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 150134380, + "step": 1548 + }, + { + "epoch": 0.15134923738756356, + "loss": 0.08925479650497437, + "loss_ce": 0.01230472233146429, + "loss_iou": 0.3671875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 150134380, + "step": 1548 + }, + { + "epoch": 0.1514470082127493, + "grad_norm": 6.255635284692799, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 150230780, + "step": 1549 + }, + { + "epoch": 0.1514470082127493, + "loss": 0.09134595841169357, + "loss_ce": 0.0031043821945786476, + "loss_iou": 0.265625, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 150230780, + "step": 1549 + }, + { + "epoch": 0.1515447790379351, + "grad_norm": 3.7303824579931897, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 150327604, + "step": 1550 + }, + { + "epoch": 0.1515447790379351, + "loss": 0.08123214542865753, + "loss_ce": 0.005884241312742233, + "loss_iou": 0.279296875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 150327604, + "step": 1550 + }, + { + "epoch": 0.15164254986312084, + "grad_norm": 16.56255580492686, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 150423604, + "step": 1551 + }, + { + "epoch": 0.15164254986312084, + "loss": 0.06496047228574753, + "loss_ce": 0.004467004910111427, + "loss_iou": 0.279296875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 150423604, + "step": 1551 + }, + { + "epoch": 0.15174032068830662, + "grad_norm": 16.714860290348728, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 150522560, + "step": 1552 + }, + { + "epoch": 0.15174032068830662, + "loss": 0.11842668056488037, + "loss_ce": 0.007205368485301733, + "loss_iou": 0.431640625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 150522560, + "step": 1552 + }, + { + "epoch": 0.15183809151349237, + "grad_norm": 13.842898024752728, + "learning_rate": 5e-05, + "loss": 0.1267, + "num_input_tokens_seen": 150620088, + "step": 1553 + }, + { + "epoch": 0.15183809151349237, + "loss": 0.14439091086387634, + "loss_ce": 0.006741378456354141, + "loss_iou": 0.4453125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 150620088, + "step": 1553 + }, + { + "epoch": 0.15193586233867815, + "grad_norm": 16.58548786231136, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 150717860, + "step": 1554 + }, + { + "epoch": 0.15193586233867815, + "loss": 0.1394449770450592, + "loss_ce": 0.004313129931688309, + "loss_iou": 0.328125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 150717860, + "step": 1554 + }, + { + "epoch": 0.1520336331638639, + "grad_norm": 17.597705000069947, + "learning_rate": 5e-05, + "loss": 0.15, + "num_input_tokens_seen": 150814384, + "step": 1555 + }, + { + "epoch": 0.1520336331638639, + "loss": 0.09630820155143738, + "loss_ce": 0.005335300229489803, + "loss_iou": 0.515625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 150814384, + "step": 1555 + }, + { + "epoch": 0.15213140398904967, + "grad_norm": 4.83867429246634, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 150910700, + "step": 1556 + }, + { + "epoch": 0.15213140398904967, + "loss": 0.12709173560142517, + "loss_ce": 0.004319518804550171, + "loss_iou": 0.55078125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 150910700, + "step": 1556 + }, + { + "epoch": 0.15222917481423542, + "grad_norm": 3.980591353196268, + "learning_rate": 5e-05, + "loss": 0.1224, + "num_input_tokens_seen": 151008064, + "step": 1557 + }, + { + "epoch": 0.15222917481423542, + "loss": 0.12346760928630829, + "loss_ce": 0.008645227178931236, + "loss_iou": 0.361328125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 151008064, + "step": 1557 + }, + { + "epoch": 0.1523269456394212, + "grad_norm": 2.8560290524922602, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 151104468, + "step": 1558 + }, + { + "epoch": 0.1523269456394212, + "loss": 0.0884302407503128, + "loss_ce": 0.006994088180363178, + "loss_iou": 0.21875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 151104468, + "step": 1558 + }, + { + "epoch": 0.15242471646460695, + "grad_norm": 12.504160334686102, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 151201180, + "step": 1559 + }, + { + "epoch": 0.15242471646460695, + "loss": 0.09111149609088898, + "loss_ce": 0.007142374292016029, + "loss_iou": 0.490234375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 151201180, + "step": 1559 + }, + { + "epoch": 0.15252248728979273, + "grad_norm": 24.177093230666692, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 151297744, + "step": 1560 + }, + { + "epoch": 0.15252248728979273, + "loss": 0.06955265253782272, + "loss_ce": 0.007978669367730618, + "loss_iou": 0.3046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 151297744, + "step": 1560 + }, + { + "epoch": 0.15262025811497848, + "grad_norm": 14.640172374442479, + "learning_rate": 5e-05, + "loss": 0.1219, + "num_input_tokens_seen": 151395528, + "step": 1561 + }, + { + "epoch": 0.15262025811497848, + "loss": 0.15259242057800293, + "loss_ce": 0.007252449169754982, + "loss_iou": 0.4453125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 151395528, + "step": 1561 + }, + { + "epoch": 0.15271802894016426, + "grad_norm": 18.98511176121752, + "learning_rate": 5e-05, + "loss": 0.1182, + "num_input_tokens_seen": 151492432, + "step": 1562 + }, + { + "epoch": 0.15271802894016426, + "loss": 0.08778255432844162, + "loss_ce": 0.007063563913106918, + "loss_iou": 0.34375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 151492432, + "step": 1562 + }, + { + "epoch": 0.15281579976535, + "grad_norm": 17.75150662218327, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 151590136, + "step": 1563 + }, + { + "epoch": 0.15281579976535, + "loss": 0.09133519232273102, + "loss_ce": 0.0061911484226584435, + "loss_iou": 0.375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 151590136, + "step": 1563 + }, + { + "epoch": 0.15291357059053579, + "grad_norm": 10.556844333311886, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 151686976, + "step": 1564 + }, + { + "epoch": 0.15291357059053579, + "loss": 0.11106756329536438, + "loss_ce": 0.003920342773199081, + "loss_iou": 0.435546875, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 151686976, + "step": 1564 + }, + { + "epoch": 0.15301134141572154, + "grad_norm": 5.2165056080552965, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 151783892, + "step": 1565 + }, + { + "epoch": 0.15301134141572154, + "loss": 0.08734732866287231, + "loss_ce": 0.010565108619630337, + "loss_iou": 0.240234375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 151783892, + "step": 1565 + }, + { + "epoch": 0.1531091122409073, + "grad_norm": 5.516362971232098, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 151881508, + "step": 1566 + }, + { + "epoch": 0.1531091122409073, + "loss": 0.07594439387321472, + "loss_ce": 0.006700006779283285, + "loss_iou": 0.298828125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 151881508, + "step": 1566 + }, + { + "epoch": 0.1532068830660931, + "grad_norm": 14.125978920154905, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 151979252, + "step": 1567 + }, + { + "epoch": 0.1532068830660931, + "loss": 0.14323778450489044, + "loss_ce": 0.0033299524802714586, + "loss_iou": 0.390625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 151979252, + "step": 1567 + }, + { + "epoch": 0.15330465389127884, + "grad_norm": 36.06631472504221, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 152076264, + "step": 1568 + }, + { + "epoch": 0.15330465389127884, + "loss": 0.08768496662378311, + "loss_ce": 0.009758330881595612, + "loss_iou": 0.28515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 152076264, + "step": 1568 + }, + { + "epoch": 0.15340242471646462, + "grad_norm": 7.298761638571312, + "learning_rate": 5e-05, + "loss": 0.1391, + "num_input_tokens_seen": 152173160, + "step": 1569 + }, + { + "epoch": 0.15340242471646462, + "loss": 0.1756928265094757, + "loss_ce": 0.008761678822338581, + "loss_iou": 0.5546875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 152173160, + "step": 1569 + }, + { + "epoch": 0.15350019554165037, + "grad_norm": 3.766671258715562, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 152269560, + "step": 1570 + }, + { + "epoch": 0.15350019554165037, + "loss": 0.10252499580383301, + "loss_ce": 0.004487270954996347, + "loss_iou": 0.466796875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 152269560, + "step": 1570 + }, + { + "epoch": 0.15359796636683615, + "grad_norm": 10.365236443260773, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 152366788, + "step": 1571 + }, + { + "epoch": 0.15359796636683615, + "loss": 0.08673125505447388, + "loss_ce": 0.0025027382653206587, + "loss_iou": 0.41015625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 152366788, + "step": 1571 + }, + { + "epoch": 0.1536957371920219, + "grad_norm": 5.29055440614751, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 152463860, + "step": 1572 + }, + { + "epoch": 0.1536957371920219, + "loss": 0.07052250951528549, + "loss_ce": 0.003429615404456854, + "loss_iou": 0.38671875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 152463860, + "step": 1572 + }, + { + "epoch": 0.15379350801720768, + "grad_norm": 10.079451096567437, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 152560440, + "step": 1573 + }, + { + "epoch": 0.15379350801720768, + "loss": 0.07532379031181335, + "loss_ce": 0.006125183310359716, + "loss_iou": 0.3515625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 152560440, + "step": 1573 + }, + { + "epoch": 0.15389127884239343, + "grad_norm": 6.685573393428635, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 152657668, + "step": 1574 + }, + { + "epoch": 0.15389127884239343, + "loss": 0.07054348289966583, + "loss_ce": 0.006273460574448109, + "loss_iou": 0.392578125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 152657668, + "step": 1574 + }, + { + "epoch": 0.1539890496675792, + "grad_norm": 14.647311289614727, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 152754808, + "step": 1575 + }, + { + "epoch": 0.1539890496675792, + "loss": 0.09079517424106598, + "loss_ce": 0.0038811112754046917, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 152754808, + "step": 1575 + }, + { + "epoch": 0.15408682049276495, + "grad_norm": 7.0545033091477825, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 152851184, + "step": 1576 + }, + { + "epoch": 0.15408682049276495, + "loss": 0.08836618065834045, + "loss_ce": 0.004206322133541107, + "loss_iou": 0.23046875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 152851184, + "step": 1576 + }, + { + "epoch": 0.15418459131795073, + "grad_norm": 4.73170474076515, + "learning_rate": 5e-05, + "loss": 0.1435, + "num_input_tokens_seen": 152948260, + "step": 1577 + }, + { + "epoch": 0.15418459131795073, + "loss": 0.2074461132287979, + "loss_ce": 0.009692208841443062, + "loss_iou": 0.32421875, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 152948260, + "step": 1577 + }, + { + "epoch": 0.15428236214313648, + "grad_norm": 5.044602409434542, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 153045208, + "step": 1578 + }, + { + "epoch": 0.15428236214313648, + "loss": 0.08018647879362106, + "loss_ce": 0.009019485674798489, + "loss_iou": 0.341796875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 153045208, + "step": 1578 + }, + { + "epoch": 0.15438013296832226, + "grad_norm": 5.3359870517270735, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 153142700, + "step": 1579 + }, + { + "epoch": 0.15438013296832226, + "loss": 0.12385587394237518, + "loss_ce": 0.007110876962542534, + "loss_iou": 0.337890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 153142700, + "step": 1579 + }, + { + "epoch": 0.154477903793508, + "grad_norm": 6.228752942845648, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 153239604, + "step": 1580 + }, + { + "epoch": 0.154477903793508, + "loss": 0.09823191165924072, + "loss_ce": 0.00381052796728909, + "loss_iou": 0.3828125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 153239604, + "step": 1580 + }, + { + "epoch": 0.1545756746186938, + "grad_norm": 3.07080113304378, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 153335136, + "step": 1581 + }, + { + "epoch": 0.1545756746186938, + "loss": 0.09742722660303116, + "loss_ce": 0.006011825054883957, + "loss_iou": 0.26171875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 153335136, + "step": 1581 + }, + { + "epoch": 0.15467344544387954, + "grad_norm": 5.7384539852314855, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 153431688, + "step": 1582 + }, + { + "epoch": 0.15467344544387954, + "loss": 0.07981909066438675, + "loss_ce": 0.007583986967802048, + "loss_iou": 0.27734375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 153431688, + "step": 1582 + }, + { + "epoch": 0.15477121626906531, + "grad_norm": 10.372302655463606, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 153528024, + "step": 1583 + }, + { + "epoch": 0.15477121626906531, + "loss": 0.12775835394859314, + "loss_ce": 0.005428654607385397, + "loss_iou": 0.326171875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 153528024, + "step": 1583 + }, + { + "epoch": 0.15486898709425106, + "grad_norm": 36.379664465685615, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 153625024, + "step": 1584 + }, + { + "epoch": 0.15486898709425106, + "loss": 0.1086997240781784, + "loss_ce": 0.015224387869238853, + "loss_iou": 0.326171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 153625024, + "step": 1584 + }, + { + "epoch": 0.15496675791943684, + "grad_norm": 8.48806112592423, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 153721692, + "step": 1585 + }, + { + "epoch": 0.15496675791943684, + "loss": 0.06478653848171234, + "loss_ce": 0.002233130158856511, + "loss_iou": 0.244140625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 153721692, + "step": 1585 + }, + { + "epoch": 0.1550645287446226, + "grad_norm": 3.946694801136259, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 153819316, + "step": 1586 + }, + { + "epoch": 0.1550645287446226, + "loss": 0.06586581468582153, + "loss_ce": 0.006112393923103809, + "loss_iou": 0.322265625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 153819316, + "step": 1586 + }, + { + "epoch": 0.15516229956980837, + "grad_norm": 22.857666016911047, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 153916024, + "step": 1587 + }, + { + "epoch": 0.15516229956980837, + "loss": 0.12180617451667786, + "loss_ce": 0.007578878663480282, + "loss_iou": 0.33984375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 153916024, + "step": 1587 + }, + { + "epoch": 0.15526007039499412, + "grad_norm": 17.12575454148594, + "learning_rate": 5e-05, + "loss": 0.1416, + "num_input_tokens_seen": 154012264, + "step": 1588 + }, + { + "epoch": 0.15526007039499412, + "loss": 0.1108904480934143, + "loss_ce": 0.005330151878297329, + "loss_iou": 0.27734375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 154012264, + "step": 1588 + }, + { + "epoch": 0.1553578412201799, + "grad_norm": 7.938216756547848, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 154108432, + "step": 1589 + }, + { + "epoch": 0.1553578412201799, + "loss": 0.0767570361495018, + "loss_ce": 0.00401839055120945, + "loss_iou": 0.330078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 154108432, + "step": 1589 + }, + { + "epoch": 0.15545561204536568, + "grad_norm": 10.495216625776047, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 154205612, + "step": 1590 + }, + { + "epoch": 0.15545561204536568, + "loss": 0.08359859138727188, + "loss_ce": 0.0036120188888162374, + "loss_iou": 0.51953125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 154205612, + "step": 1590 + }, + { + "epoch": 0.15555338287055143, + "grad_norm": 17.238587663297622, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 154302756, + "step": 1591 + }, + { + "epoch": 0.15555338287055143, + "loss": 0.105172298848629, + "loss_ce": 0.00516619672998786, + "loss_iou": 0.3984375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 154302756, + "step": 1591 + }, + { + "epoch": 0.1556511536957372, + "grad_norm": 8.904725642748547, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 154399796, + "step": 1592 + }, + { + "epoch": 0.1556511536957372, + "loss": 0.1225619986653328, + "loss_ce": 0.003634998109191656, + "loss_iou": 0.423828125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 154399796, + "step": 1592 + }, + { + "epoch": 0.15574892452092295, + "grad_norm": 13.818752095183289, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 154497400, + "step": 1593 + }, + { + "epoch": 0.15574892452092295, + "loss": 0.16247713565826416, + "loss_ce": 0.0049454038962721825, + "loss_iou": 0.37890625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 154497400, + "step": 1593 + }, + { + "epoch": 0.15584669534610873, + "grad_norm": 5.412747477058009, + "learning_rate": 5e-05, + "loss": 0.1091, + "num_input_tokens_seen": 154594592, + "step": 1594 + }, + { + "epoch": 0.15584669534610873, + "loss": 0.10669542849063873, + "loss_ce": 0.005773797631263733, + "loss_iou": 0.376953125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 154594592, + "step": 1594 + }, + { + "epoch": 0.15594446617129448, + "grad_norm": 11.62494451717172, + "learning_rate": 5e-05, + "loss": 0.1299, + "num_input_tokens_seen": 154691940, + "step": 1595 + }, + { + "epoch": 0.15594446617129448, + "loss": 0.1052195206284523, + "loss_ce": 0.0041758231818675995, + "loss_iou": 0.37890625, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 154691940, + "step": 1595 + }, + { + "epoch": 0.15604223699648026, + "grad_norm": 8.342850383991147, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 154789328, + "step": 1596 + }, + { + "epoch": 0.15604223699648026, + "loss": 0.07489323616027832, + "loss_ce": 0.004107712768018246, + "loss_iou": 0.33984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 154789328, + "step": 1596 + }, + { + "epoch": 0.156140007821666, + "grad_norm": 5.997518511770054, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 154886384, + "step": 1597 + }, + { + "epoch": 0.156140007821666, + "loss": 0.07840841263532639, + "loss_ce": 0.01076620165258646, + "loss_iou": 0.37890625, + "loss_num": 0.01348876953125, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 154886384, + "step": 1597 + }, + { + "epoch": 0.1562377786468518, + "grad_norm": 10.75642800856716, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 154983756, + "step": 1598 + }, + { + "epoch": 0.1562377786468518, + "loss": 0.09598623216152191, + "loss_ce": 0.004494538530707359, + "loss_iou": 0.416015625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 154983756, + "step": 1598 + }, + { + "epoch": 0.15633554947203754, + "grad_norm": 9.85497350068464, + "learning_rate": 5e-05, + "loss": 0.1198, + "num_input_tokens_seen": 155082096, + "step": 1599 + }, + { + "epoch": 0.15633554947203754, + "loss": 0.12301520258188248, + "loss_ce": 0.009245678782463074, + "loss_iou": 0.447265625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 155082096, + "step": 1599 + }, + { + "epoch": 0.15643332029722332, + "grad_norm": 13.336478111337344, + "learning_rate": 5e-05, + "loss": 0.1383, + "num_input_tokens_seen": 155179016, + "step": 1600 + }, + { + "epoch": 0.15643332029722332, + "loss": 0.15856528282165527, + "loss_ce": 0.0037801319267600775, + "loss_iou": 0.41015625, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 155179016, + "step": 1600 + }, + { + "epoch": 0.15653109112240907, + "grad_norm": 13.018136913963033, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 155275812, + "step": 1601 + }, + { + "epoch": 0.15653109112240907, + "loss": 0.0916433185338974, + "loss_ce": 0.002112369518727064, + "loss_iou": 0.375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 155275812, + "step": 1601 + }, + { + "epoch": 0.15662886194759484, + "grad_norm": 14.13604210354207, + "learning_rate": 5e-05, + "loss": 0.1215, + "num_input_tokens_seen": 155372768, + "step": 1602 + }, + { + "epoch": 0.15662886194759484, + "loss": 0.07724316418170929, + "loss_ce": 0.005114873871207237, + "loss_iou": 0.34375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 155372768, + "step": 1602 + }, + { + "epoch": 0.1567266327727806, + "grad_norm": 21.715941362508417, + "learning_rate": 5e-05, + "loss": 0.1214, + "num_input_tokens_seen": 155469696, + "step": 1603 + }, + { + "epoch": 0.1567266327727806, + "loss": 0.1034725159406662, + "loss_ce": 0.007281109225004911, + "loss_iou": 0.33984375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 155469696, + "step": 1603 + }, + { + "epoch": 0.15682440359796637, + "grad_norm": 10.494690580888626, + "learning_rate": 5e-05, + "loss": 0.144, + "num_input_tokens_seen": 155566844, + "step": 1604 + }, + { + "epoch": 0.15682440359796637, + "loss": 0.10338228940963745, + "loss_ce": 0.006031213328242302, + "loss_iou": 0.412109375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 155566844, + "step": 1604 + }, + { + "epoch": 0.15692217442315212, + "grad_norm": 7.392636269638259, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 155664380, + "step": 1605 + }, + { + "epoch": 0.15692217442315212, + "loss": 0.06295498460531235, + "loss_ce": 0.004590116906911135, + "loss_iou": 0.37890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 155664380, + "step": 1605 + }, + { + "epoch": 0.1570199452483379, + "grad_norm": 14.1287776614331, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 155762104, + "step": 1606 + }, + { + "epoch": 0.1570199452483379, + "loss": 0.10670720785856247, + "loss_ce": 0.0035272736568003893, + "loss_iou": 0.3125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 155762104, + "step": 1606 + }, + { + "epoch": 0.15711771607352365, + "grad_norm": 12.069384047276843, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 155860624, + "step": 1607 + }, + { + "epoch": 0.15711771607352365, + "loss": 0.11905118077993393, + "loss_ce": 0.00766201876103878, + "loss_iou": 0.44140625, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 155860624, + "step": 1607 + }, + { + "epoch": 0.15721548689870943, + "grad_norm": 9.356110217654933, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 155957132, + "step": 1608 + }, + { + "epoch": 0.15721548689870943, + "loss": 0.12556371092796326, + "loss_ce": 0.005919540300965309, + "loss_iou": 0.404296875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 155957132, + "step": 1608 + }, + { + "epoch": 0.15731325772389518, + "grad_norm": 9.051845653869725, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 156053896, + "step": 1609 + }, + { + "epoch": 0.15731325772389518, + "loss": 0.09257158637046814, + "loss_ce": 0.00870927982032299, + "loss_iou": 0.41015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 156053896, + "step": 1609 + }, + { + "epoch": 0.15741102854908096, + "grad_norm": 11.168956477077833, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 156151220, + "step": 1610 + }, + { + "epoch": 0.15741102854908096, + "loss": 0.07205250859260559, + "loss_ce": 0.0056386240758001804, + "loss_iou": 0.412109375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 156151220, + "step": 1610 + }, + { + "epoch": 0.1575087993742667, + "grad_norm": 9.760189405339988, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 156248828, + "step": 1611 + }, + { + "epoch": 0.1575087993742667, + "loss": 0.11831355839967728, + "loss_ce": 0.00414729630574584, + "loss_iou": 0.421875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 156248828, + "step": 1611 + }, + { + "epoch": 0.15760657019945248, + "grad_norm": 14.172031608121433, + "learning_rate": 5e-05, + "loss": 0.1238, + "num_input_tokens_seen": 156345820, + "step": 1612 + }, + { + "epoch": 0.15760657019945248, + "loss": 0.14145347476005554, + "loss_ce": 0.0038802321068942547, + "loss_iou": 0.306640625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 156345820, + "step": 1612 + }, + { + "epoch": 0.15770434102463826, + "grad_norm": 6.752306255845613, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 156442380, + "step": 1613 + }, + { + "epoch": 0.15770434102463826, + "loss": 0.1453070044517517, + "loss_ce": 0.005704334005713463, + "loss_iou": 0.38671875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 156442380, + "step": 1613 + }, + { + "epoch": 0.157802111849824, + "grad_norm": 11.314850850191517, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 156538720, + "step": 1614 + }, + { + "epoch": 0.157802111849824, + "loss": 0.060105688869953156, + "loss_ce": 0.001881959498859942, + "loss_iou": 0.287109375, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 156538720, + "step": 1614 + }, + { + "epoch": 0.1578998826750098, + "grad_norm": 18.925142495693176, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 156636496, + "step": 1615 + }, + { + "epoch": 0.1578998826750098, + "loss": 0.13328507542610168, + "loss_ce": 0.0017237946158275008, + "loss_iou": 0.37109375, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 156636496, + "step": 1615 + }, + { + "epoch": 0.15799765350019554, + "grad_norm": 15.544085927990228, + "learning_rate": 5e-05, + "loss": 0.1413, + "num_input_tokens_seen": 156733276, + "step": 1616 + }, + { + "epoch": 0.15799765350019554, + "loss": 0.13440218567848206, + "loss_ce": 0.007845795713365078, + "loss_iou": 0.2578125, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 156733276, + "step": 1616 + }, + { + "epoch": 0.15809542432538132, + "grad_norm": 11.616958213251285, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 156830004, + "step": 1617 + }, + { + "epoch": 0.15809542432538132, + "loss": 0.0971120223402977, + "loss_ce": 0.003957117907702923, + "loss_iou": 0.44140625, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 156830004, + "step": 1617 + }, + { + "epoch": 0.15819319515056707, + "grad_norm": 8.927123791230564, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 156927288, + "step": 1618 + }, + { + "epoch": 0.15819319515056707, + "loss": 0.06136142089962959, + "loss_ce": 0.010488618165254593, + "loss_iou": 0.34765625, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 156927288, + "step": 1618 + }, + { + "epoch": 0.15829096597575285, + "grad_norm": 9.201113814444927, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 157024212, + "step": 1619 + }, + { + "epoch": 0.15829096597575285, + "loss": 0.11434520781040192, + "loss_ce": 0.01465954352170229, + "loss_iou": 0.3203125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 157024212, + "step": 1619 + }, + { + "epoch": 0.1583887368009386, + "grad_norm": 9.282615186255644, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 157121356, + "step": 1620 + }, + { + "epoch": 0.1583887368009386, + "loss": 0.1472621113061905, + "loss_ce": 0.004302519373595715, + "loss_iou": 0.400390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 157121356, + "step": 1620 + }, + { + "epoch": 0.15848650762612437, + "grad_norm": 25.80233738295134, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 157218448, + "step": 1621 + }, + { + "epoch": 0.15848650762612437, + "loss": 0.12422919273376465, + "loss_ce": 0.004203559830784798, + "loss_iou": 0.4140625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 157218448, + "step": 1621 + }, + { + "epoch": 0.15858427845131012, + "grad_norm": 5.413259890354649, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 157315248, + "step": 1622 + }, + { + "epoch": 0.15858427845131012, + "loss": 0.14392036199569702, + "loss_ce": 0.0025629373267292976, + "loss_iou": 0.546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 157315248, + "step": 1622 + }, + { + "epoch": 0.1586820492764959, + "grad_norm": 14.335686281933942, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 157412104, + "step": 1623 + }, + { + "epoch": 0.1586820492764959, + "loss": 0.12803582847118378, + "loss_ce": 0.0038292845711112022, + "loss_iou": 0.41796875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 157412104, + "step": 1623 + }, + { + "epoch": 0.15877982010168165, + "grad_norm": 14.724834011419034, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 157508952, + "step": 1624 + }, + { + "epoch": 0.15877982010168165, + "loss": 0.10934436321258545, + "loss_ce": 0.00532520329579711, + "loss_iou": 0.3671875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 157508952, + "step": 1624 + }, + { + "epoch": 0.15887759092686743, + "grad_norm": 17.792601097334128, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 157605408, + "step": 1625 + }, + { + "epoch": 0.15887759092686743, + "loss": 0.09880082309246063, + "loss_ce": 0.01011674478650093, + "loss_iou": 0.3828125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 157605408, + "step": 1625 + }, + { + "epoch": 0.15897536175205318, + "grad_norm": 8.765825201559055, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 157703308, + "step": 1626 + }, + { + "epoch": 0.15897536175205318, + "loss": 0.09865158796310425, + "loss_ce": 0.009784405119717121, + "loss_iou": 0.46484375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 157703308, + "step": 1626 + }, + { + "epoch": 0.15907313257723896, + "grad_norm": 12.966115236719011, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 157800012, + "step": 1627 + }, + { + "epoch": 0.15907313257723896, + "loss": 0.1300681084394455, + "loss_ce": 0.0049460334703326225, + "loss_iou": 0.36328125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 157800012, + "step": 1627 + }, + { + "epoch": 0.1591709034024247, + "grad_norm": 4.849224476532213, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 157897368, + "step": 1628 + }, + { + "epoch": 0.1591709034024247, + "loss": 0.16105830669403076, + "loss_ce": 0.010026806965470314, + "loss_iou": 0.310546875, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 157897368, + "step": 1628 + }, + { + "epoch": 0.15926867422761048, + "grad_norm": 5.602915464082286, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 157995392, + "step": 1629 + }, + { + "epoch": 0.15926867422761048, + "loss": 0.12047150731086731, + "loss_ce": 0.0059085143730044365, + "loss_iou": 0.3125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 157995392, + "step": 1629 + }, + { + "epoch": 0.15936644505279624, + "grad_norm": 14.54524681045392, + "learning_rate": 5e-05, + "loss": 0.1125, + "num_input_tokens_seen": 158092032, + "step": 1630 + }, + { + "epoch": 0.15936644505279624, + "loss": 0.0906609445810318, + "loss_ce": 0.005837335251271725, + "loss_iou": 0.361328125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 158092032, + "step": 1630 + }, + { + "epoch": 0.159464215877982, + "grad_norm": 6.049361765968404, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 158189260, + "step": 1631 + }, + { + "epoch": 0.159464215877982, + "loss": 0.06690888106822968, + "loss_ce": 0.003005069447681308, + "loss_iou": 0.3828125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 158189260, + "step": 1631 + }, + { + "epoch": 0.15956198670316776, + "grad_norm": 7.520110239515953, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 158286552, + "step": 1632 + }, + { + "epoch": 0.15956198670316776, + "loss": 0.08889731764793396, + "loss_ce": 0.004195775371044874, + "loss_iou": 0.34765625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 158286552, + "step": 1632 + }, + { + "epoch": 0.15965975752835354, + "grad_norm": 1.8436438852865589, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 158383136, + "step": 1633 + }, + { + "epoch": 0.15965975752835354, + "loss": 0.07735826075077057, + "loss_ce": 0.00204088375903666, + "loss_iou": 0.37109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 158383136, + "step": 1633 + }, + { + "epoch": 0.1597575283535393, + "grad_norm": 8.488940096323118, + "learning_rate": 5e-05, + "loss": 0.1419, + "num_input_tokens_seen": 158479168, + "step": 1634 + }, + { + "epoch": 0.1597575283535393, + "loss": 0.12827768921852112, + "loss_ce": 0.008526715449988842, + "loss_iou": 0.228515625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 158479168, + "step": 1634 + }, + { + "epoch": 0.15985529917872507, + "grad_norm": 11.670474248146336, + "learning_rate": 5e-05, + "loss": 0.1177, + "num_input_tokens_seen": 158576140, + "step": 1635 + }, + { + "epoch": 0.15985529917872507, + "loss": 0.1943081021308899, + "loss_ce": 0.008272942155599594, + "loss_iou": 0.2060546875, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 158576140, + "step": 1635 + }, + { + "epoch": 0.15995307000391085, + "grad_norm": 15.006572363541752, + "learning_rate": 5e-05, + "loss": 0.1646, + "num_input_tokens_seen": 158674024, + "step": 1636 + }, + { + "epoch": 0.15995307000391085, + "loss": 0.15379127860069275, + "loss_ce": 0.004255138337612152, + "loss_iou": 0.36328125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 158674024, + "step": 1636 + }, + { + "epoch": 0.1600508408290966, + "grad_norm": 19.51320610919609, + "learning_rate": 5e-05, + "loss": 0.1233, + "num_input_tokens_seen": 158771292, + "step": 1637 + }, + { + "epoch": 0.1600508408290966, + "loss": 0.15519791841506958, + "loss_ce": 0.0057838596403598785, + "loss_iou": 0.33203125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 158771292, + "step": 1637 + }, + { + "epoch": 0.16014861165428237, + "grad_norm": 16.404975915270594, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 158868308, + "step": 1638 + }, + { + "epoch": 0.16014861165428237, + "loss": 0.13174393773078918, + "loss_ce": 0.009490528143942356, + "loss_iou": 0.484375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 158868308, + "step": 1638 + }, + { + "epoch": 0.16024638247946812, + "grad_norm": 13.224803684227057, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 158964964, + "step": 1639 + }, + { + "epoch": 0.16024638247946812, + "loss": 0.10929707437753677, + "loss_ce": 0.008589064702391624, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 158964964, + "step": 1639 + }, + { + "epoch": 0.1603441533046539, + "grad_norm": 8.912380999584087, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 159062468, + "step": 1640 + }, + { + "epoch": 0.1603441533046539, + "loss": 0.09240064769983292, + "loss_ce": 0.004128551110625267, + "loss_iou": 0.349609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 159062468, + "step": 1640 + }, + { + "epoch": 0.16044192412983965, + "grad_norm": 15.269373869903186, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 159159088, + "step": 1641 + }, + { + "epoch": 0.16044192412983965, + "loss": 0.09021183103322983, + "loss_ce": 0.003969154320657253, + "loss_iou": 0.388671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 159159088, + "step": 1641 + }, + { + "epoch": 0.16053969495502543, + "grad_norm": 20.825229149690102, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 159256256, + "step": 1642 + }, + { + "epoch": 0.16053969495502543, + "loss": 0.1225576102733612, + "loss_ce": 0.009367911145091057, + "loss_iou": 0.470703125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 159256256, + "step": 1642 + }, + { + "epoch": 0.16063746578021118, + "grad_norm": 11.786452772494762, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 159353192, + "step": 1643 + }, + { + "epoch": 0.16063746578021118, + "loss": 0.13998310267925262, + "loss_ce": 0.012038152664899826, + "loss_iou": 0.28125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 159353192, + "step": 1643 + }, + { + "epoch": 0.16073523660539696, + "grad_norm": 4.048237087548323, + "learning_rate": 5e-05, + "loss": 0.1315, + "num_input_tokens_seen": 159450220, + "step": 1644 + }, + { + "epoch": 0.16073523660539696, + "loss": 0.13486427068710327, + "loss_ce": 0.007537307683378458, + "loss_iou": 0.3359375, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 159450220, + "step": 1644 + }, + { + "epoch": 0.1608330074305827, + "grad_norm": 10.337912013426255, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 159547444, + "step": 1645 + }, + { + "epoch": 0.1608330074305827, + "loss": 0.12392272055149078, + "loss_ce": 0.002645860193297267, + "loss_iou": 0.341796875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 159547444, + "step": 1645 + }, + { + "epoch": 0.1609307782557685, + "grad_norm": 7.718678868429937, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 159644716, + "step": 1646 + }, + { + "epoch": 0.1609307782557685, + "loss": 0.11873093992471695, + "loss_ce": 0.007524882443249226, + "loss_iou": 0.384765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 159644716, + "step": 1646 + }, + { + "epoch": 0.16102854908095424, + "grad_norm": 4.8961901332834845, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 159741368, + "step": 1647 + }, + { + "epoch": 0.16102854908095424, + "loss": 0.07742815464735031, + "loss_ce": 0.005498223006725311, + "loss_iou": 0.451171875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 159741368, + "step": 1647 + }, + { + "epoch": 0.16112631990614001, + "grad_norm": 19.638569971164436, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 159838036, + "step": 1648 + }, + { + "epoch": 0.16112631990614001, + "loss": 0.08406852185726166, + "loss_ce": 0.004341353662312031, + "loss_iou": 0.3671875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 159838036, + "step": 1648 + }, + { + "epoch": 0.16122409073132576, + "grad_norm": 18.26653992009641, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 159935868, + "step": 1649 + }, + { + "epoch": 0.16122409073132576, + "loss": 0.08518648147583008, + "loss_ce": 0.004925249144434929, + "loss_iou": 0.40625, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 159935868, + "step": 1649 + }, + { + "epoch": 0.16132186155651154, + "grad_norm": 16.2743066743434, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 160032436, + "step": 1650 + }, + { + "epoch": 0.16132186155651154, + "loss": 0.12318238615989685, + "loss_ce": 0.008070083335042, + "loss_iou": 0.1875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 160032436, + "step": 1650 + }, + { + "epoch": 0.1614196323816973, + "grad_norm": 14.072805977627294, + "learning_rate": 5e-05, + "loss": 0.19, + "num_input_tokens_seen": 160129616, + "step": 1651 + }, + { + "epoch": 0.1614196323816973, + "loss": 0.1741575002670288, + "loss_ce": 0.008050321601331234, + "loss_iou": 0.31640625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 160129616, + "step": 1651 + }, + { + "epoch": 0.16151740320688307, + "grad_norm": 8.312959870919734, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 160225584, + "step": 1652 + }, + { + "epoch": 0.16151740320688307, + "loss": 0.12792761623859406, + "loss_ce": 0.0046671126037836075, + "loss_iou": 0.28125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 160225584, + "step": 1652 + }, + { + "epoch": 0.16161517403206882, + "grad_norm": 2.9378725157665353, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 160321664, + "step": 1653 + }, + { + "epoch": 0.16161517403206882, + "loss": 0.12103414535522461, + "loss_ce": 0.007859708741307259, + "loss_iou": 0.326171875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 160321664, + "step": 1653 + }, + { + "epoch": 0.1617129448572546, + "grad_norm": 10.340826397574295, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 160418452, + "step": 1654 + }, + { + "epoch": 0.1617129448572546, + "loss": 0.09881606698036194, + "loss_ce": 0.006897125393152237, + "loss_iou": 0.37109375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 160418452, + "step": 1654 + }, + { + "epoch": 0.16181071568244035, + "grad_norm": 5.966750364028291, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 160515376, + "step": 1655 + }, + { + "epoch": 0.16181071568244035, + "loss": 0.10368486493825912, + "loss_ce": 0.005189383868128061, + "loss_iou": 0.302734375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 160515376, + "step": 1655 + }, + { + "epoch": 0.16190848650762613, + "grad_norm": 9.74687577425865, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 160612776, + "step": 1656 + }, + { + "epoch": 0.16190848650762613, + "loss": 0.0863432064652443, + "loss_ce": 0.0036863484419882298, + "loss_iou": 0.4140625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 160612776, + "step": 1656 + }, + { + "epoch": 0.16200625733281188, + "grad_norm": 8.701835361448742, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 160709292, + "step": 1657 + }, + { + "epoch": 0.16200625733281188, + "loss": 0.12083069980144501, + "loss_ce": 0.008159802295267582, + "loss_iou": 0.431640625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 160709292, + "step": 1657 + }, + { + "epoch": 0.16210402815799765, + "grad_norm": 7.989561972564174, + "learning_rate": 5e-05, + "loss": 0.1289, + "num_input_tokens_seen": 160806472, + "step": 1658 + }, + { + "epoch": 0.16210402815799765, + "loss": 0.10500246286392212, + "loss_ce": 0.00590425543487072, + "loss_iou": 0.318359375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 160806472, + "step": 1658 + }, + { + "epoch": 0.16220179898318343, + "grad_norm": 8.649059656980608, + "learning_rate": 5e-05, + "loss": 0.1447, + "num_input_tokens_seen": 160904012, + "step": 1659 + }, + { + "epoch": 0.16220179898318343, + "loss": 0.17633624374866486, + "loss_ce": 0.01010699663311243, + "loss_iou": 0.482421875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 160904012, + "step": 1659 + }, + { + "epoch": 0.16229956980836918, + "grad_norm": 6.015143266126121, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 161000432, + "step": 1660 + }, + { + "epoch": 0.16229956980836918, + "loss": 0.1054137796163559, + "loss_ce": 0.0058959610760211945, + "loss_iou": 0.2421875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 161000432, + "step": 1660 + }, + { + "epoch": 0.16239734063355496, + "grad_norm": 3.094402263795015, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 161096764, + "step": 1661 + }, + { + "epoch": 0.16239734063355496, + "loss": 0.09569861739873886, + "loss_ce": 0.0033219093456864357, + "loss_iou": 0.251953125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 161096764, + "step": 1661 + }, + { + "epoch": 0.1624951114587407, + "grad_norm": 9.396777278416204, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 161193592, + "step": 1662 + }, + { + "epoch": 0.1624951114587407, + "loss": 0.10051875561475754, + "loss_ce": 0.006051594391465187, + "loss_iou": 0.3359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 161193592, + "step": 1662 + }, + { + "epoch": 0.1625928822839265, + "grad_norm": 16.845402125311168, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 161290488, + "step": 1663 + }, + { + "epoch": 0.1625928822839265, + "loss": 0.12320297956466675, + "loss_ce": 0.004398050252348185, + "loss_iou": 0.373046875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 161290488, + "step": 1663 + }, + { + "epoch": 0.16269065310911224, + "grad_norm": 75.87770194526095, + "learning_rate": 5e-05, + "loss": 0.145, + "num_input_tokens_seen": 161388004, + "step": 1664 + }, + { + "epoch": 0.16269065310911224, + "loss": 0.11038167029619217, + "loss_ce": 0.005279130302369595, + "loss_iou": 0.4453125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 161388004, + "step": 1664 + }, + { + "epoch": 0.16278842393429802, + "grad_norm": 6.567399193710856, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 161484948, + "step": 1665 + }, + { + "epoch": 0.16278842393429802, + "loss": 0.09695568680763245, + "loss_ce": 0.0034193117171525955, + "loss_iou": 0.43359375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 161484948, + "step": 1665 + }, + { + "epoch": 0.16288619475948377, + "grad_norm": 16.672735971107073, + "learning_rate": 5e-05, + "loss": 0.1669, + "num_input_tokens_seen": 161581028, + "step": 1666 + }, + { + "epoch": 0.16288619475948377, + "loss": 0.15189921855926514, + "loss_ce": 0.0049875956028699875, + "loss_iou": 0.408203125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 161581028, + "step": 1666 + }, + { + "epoch": 0.16298396558466954, + "grad_norm": 6.08922062597667, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 161678168, + "step": 1667 + }, + { + "epoch": 0.16298396558466954, + "loss": 0.15904514491558075, + "loss_ce": 0.0067929476499557495, + "loss_iou": 0.396484375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 161678168, + "step": 1667 + }, + { + "epoch": 0.1630817364098553, + "grad_norm": 9.480392346051978, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 161775764, + "step": 1668 + }, + { + "epoch": 0.1630817364098553, + "loss": 0.16673673689365387, + "loss_ce": 0.004261154215782881, + "loss_iou": 0.3671875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 161775764, + "step": 1668 + }, + { + "epoch": 0.16317950723504107, + "grad_norm": 13.56983257814224, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 161872896, + "step": 1669 + }, + { + "epoch": 0.16317950723504107, + "loss": 0.07976465672254562, + "loss_ce": 0.004485422745347023, + "loss_iou": 0.462890625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 161872896, + "step": 1669 + }, + { + "epoch": 0.16327727806022682, + "grad_norm": 3.2087973400661385, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 161969668, + "step": 1670 + }, + { + "epoch": 0.16327727806022682, + "loss": 0.1285891979932785, + "loss_ce": 0.008899259380996227, + "loss_iou": 0.33203125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 161969668, + "step": 1670 + }, + { + "epoch": 0.1633750488854126, + "grad_norm": 8.618671344542735, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 162067172, + "step": 1671 + }, + { + "epoch": 0.1633750488854126, + "loss": 0.04828784614801407, + "loss_ce": 0.007134896237403154, + "loss_iou": 0.326171875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 162067172, + "step": 1671 + }, + { + "epoch": 0.16347281971059835, + "grad_norm": 3.1967027368701246, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 162164528, + "step": 1672 + }, + { + "epoch": 0.16347281971059835, + "loss": 0.06303104013204575, + "loss_ce": 0.0034149521961808205, + "loss_iou": 0.4609375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 162164528, + "step": 1672 + }, + { + "epoch": 0.16357059053578413, + "grad_norm": 18.117021536791167, + "learning_rate": 5e-05, + "loss": 0.158, + "num_input_tokens_seen": 162260980, + "step": 1673 + }, + { + "epoch": 0.16357059053578413, + "loss": 0.16644197702407837, + "loss_ce": 0.0059805503115057945, + "loss_iou": 0.349609375, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 162260980, + "step": 1673 + }, + { + "epoch": 0.16366836136096988, + "grad_norm": 8.04111657387404, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 162357620, + "step": 1674 + }, + { + "epoch": 0.16366836136096988, + "loss": 0.12520834803581238, + "loss_ce": 0.009096590802073479, + "loss_iou": 0.11328125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 162357620, + "step": 1674 + }, + { + "epoch": 0.16376613218615566, + "grad_norm": 4.36158428558856, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 162455008, + "step": 1675 + }, + { + "epoch": 0.16376613218615566, + "loss": 0.06927098333835602, + "loss_ce": 0.007245431654155254, + "loss_iou": 0.390625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 162455008, + "step": 1675 + }, + { + "epoch": 0.1638639030113414, + "grad_norm": 9.572054543466402, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 162552172, + "step": 1676 + }, + { + "epoch": 0.1638639030113414, + "loss": 0.11969142407178879, + "loss_ce": 0.0076919132843613625, + "loss_iou": 0.283203125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 162552172, + "step": 1676 + }, + { + "epoch": 0.16396167383652718, + "grad_norm": 6.433079435833001, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 162649352, + "step": 1677 + }, + { + "epoch": 0.16396167383652718, + "loss": 0.09130752831697464, + "loss_ce": 0.003958586603403091, + "loss_iou": 0.2255859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 162649352, + "step": 1677 + }, + { + "epoch": 0.16405944466171293, + "grad_norm": 4.3048161732650225, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 162746328, + "step": 1678 + }, + { + "epoch": 0.16405944466171293, + "loss": 0.12813568115234375, + "loss_ce": 0.007957464084029198, + "loss_iou": 0.349609375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 162746328, + "step": 1678 + }, + { + "epoch": 0.1641572154868987, + "grad_norm": 7.88327042142571, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 162843492, + "step": 1679 + }, + { + "epoch": 0.1641572154868987, + "loss": 0.06756413727998734, + "loss_ce": 0.005018359515815973, + "loss_iou": 0.2373046875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 162843492, + "step": 1679 + }, + { + "epoch": 0.16425498631208446, + "grad_norm": 15.89050812905673, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 162940108, + "step": 1680 + }, + { + "epoch": 0.16425498631208446, + "loss": 0.0939483791589737, + "loss_ce": 0.005783098749816418, + "loss_iou": 0.373046875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 162940108, + "step": 1680 + }, + { + "epoch": 0.16435275713727024, + "grad_norm": 8.014985855899837, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 163036324, + "step": 1681 + }, + { + "epoch": 0.16435275713727024, + "loss": 0.06264874339103699, + "loss_ce": 0.01014324463903904, + "loss_iou": 0.37890625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 163036324, + "step": 1681 + }, + { + "epoch": 0.16445052796245602, + "grad_norm": 24.303000927224485, + "learning_rate": 5e-05, + "loss": 0.166, + "num_input_tokens_seen": 163132952, + "step": 1682 + }, + { + "epoch": 0.16445052796245602, + "loss": 0.2077922224998474, + "loss_ce": 0.015104217454791069, + "loss_iou": 0.361328125, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 163132952, + "step": 1682 + }, + { + "epoch": 0.16454829878764177, + "grad_norm": 12.948888669187435, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 163229312, + "step": 1683 + }, + { + "epoch": 0.16454829878764177, + "loss": 0.07227383553981781, + "loss_ce": 0.007797824684530497, + "loss_iou": 0.291015625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 163229312, + "step": 1683 + }, + { + "epoch": 0.16464606961282754, + "grad_norm": 21.360010466707053, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 163325136, + "step": 1684 + }, + { + "epoch": 0.16464606961282754, + "loss": 0.10795803368091583, + "loss_ce": 0.004396640695631504, + "loss_iou": 0.38671875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 163325136, + "step": 1684 + }, + { + "epoch": 0.1647438404380133, + "grad_norm": 5.218514773166236, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 163421304, + "step": 1685 + }, + { + "epoch": 0.1647438404380133, + "loss": 0.0908273383975029, + "loss_ce": 0.007186284288764, + "loss_iou": 0.32421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 163421304, + "step": 1685 + }, + { + "epoch": 0.16484161126319907, + "grad_norm": 11.763394897690695, + "learning_rate": 5e-05, + "loss": 0.1294, + "num_input_tokens_seen": 163518260, + "step": 1686 + }, + { + "epoch": 0.16484161126319907, + "loss": 0.15524934232234955, + "loss_ce": 0.005850538611412048, + "loss_iou": 0.30078125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 163518260, + "step": 1686 + }, + { + "epoch": 0.16493938208838482, + "grad_norm": 10.606784331015477, + "learning_rate": 5e-05, + "loss": 0.1225, + "num_input_tokens_seen": 163614156, + "step": 1687 + }, + { + "epoch": 0.16493938208838482, + "loss": 0.13123854994773865, + "loss_ce": 0.007550803478807211, + "loss_iou": 0.341796875, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 163614156, + "step": 1687 + }, + { + "epoch": 0.1650371529135706, + "grad_norm": 4.838220917685478, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 163711152, + "step": 1688 + }, + { + "epoch": 0.1650371529135706, + "loss": 0.125227153301239, + "loss_ce": 0.005888155661523342, + "loss_iou": 0.396484375, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 163711152, + "step": 1688 + }, + { + "epoch": 0.16513492373875635, + "grad_norm": 7.309230234091496, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 163808540, + "step": 1689 + }, + { + "epoch": 0.16513492373875635, + "loss": 0.11218300461769104, + "loss_ce": 0.00894203782081604, + "loss_iou": 0.392578125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 163808540, + "step": 1689 + }, + { + "epoch": 0.16523269456394213, + "grad_norm": 2.0604552453794063, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 163905352, + "step": 1690 + }, + { + "epoch": 0.16523269456394213, + "loss": 0.08357702195644379, + "loss_ce": 0.007374634966254234, + "loss_iou": 0.40234375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 163905352, + "step": 1690 + }, + { + "epoch": 0.16533046538912788, + "grad_norm": 5.014827955963583, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 164002072, + "step": 1691 + }, + { + "epoch": 0.16533046538912788, + "loss": 0.07465691864490509, + "loss_ce": 0.004081205930560827, + "loss_iou": 0.3359375, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 164002072, + "step": 1691 + }, + { + "epoch": 0.16542823621431366, + "grad_norm": 13.864702964549362, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 164099816, + "step": 1692 + }, + { + "epoch": 0.16542823621431366, + "loss": 0.08956176042556763, + "loss_ce": 0.0017932000337168574, + "loss_iou": 0.40625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 164099816, + "step": 1692 + }, + { + "epoch": 0.1655260070394994, + "grad_norm": 12.708561791909684, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 164196600, + "step": 1693 + }, + { + "epoch": 0.1655260070394994, + "loss": 0.0694277435541153, + "loss_ce": 0.0062182131223380566, + "loss_iou": 0.40625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 164196600, + "step": 1693 + }, + { + "epoch": 0.16562377786468518, + "grad_norm": 14.703417090737279, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 164293508, + "step": 1694 + }, + { + "epoch": 0.16562377786468518, + "loss": 0.10085733979940414, + "loss_ce": 0.0039182500913739204, + "loss_iou": 0.3984375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 164293508, + "step": 1694 + }, + { + "epoch": 0.16572154868987093, + "grad_norm": 17.033242759314938, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 164391740, + "step": 1695 + }, + { + "epoch": 0.16572154868987093, + "loss": 0.09800899773836136, + "loss_ce": 0.007478602696210146, + "loss_iou": 0.326171875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 164391740, + "step": 1695 + }, + { + "epoch": 0.1658193195150567, + "grad_norm": 5.697270999971493, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 164488132, + "step": 1696 + }, + { + "epoch": 0.1658193195150567, + "loss": 0.09765883535146713, + "loss_ce": 0.005205829162150621, + "loss_iou": 0.3671875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 164488132, + "step": 1696 + }, + { + "epoch": 0.16591709034024246, + "grad_norm": 31.564557982252033, + "learning_rate": 5e-05, + "loss": 0.1497, + "num_input_tokens_seen": 164586272, + "step": 1697 + }, + { + "epoch": 0.16591709034024246, + "loss": 0.15016230940818787, + "loss_ce": 0.008408166468143463, + "loss_iou": 0.388671875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 164586272, + "step": 1697 + }, + { + "epoch": 0.16601486116542824, + "grad_norm": 56.29791235876688, + "learning_rate": 5e-05, + "loss": 0.1195, + "num_input_tokens_seen": 164683108, + "step": 1698 + }, + { + "epoch": 0.16601486116542824, + "loss": 0.10699740052223206, + "loss_ce": 0.006136804819107056, + "loss_iou": 0.376953125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 164683108, + "step": 1698 + }, + { + "epoch": 0.166112631990614, + "grad_norm": 8.787285045906886, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 164780624, + "step": 1699 + }, + { + "epoch": 0.166112631990614, + "loss": 0.11319492757320404, + "loss_ce": 0.005803565960377455, + "loss_iou": 0.48046875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 164780624, + "step": 1699 + }, + { + "epoch": 0.16621040281579977, + "grad_norm": 2.9765997319831694, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 164877932, + "step": 1700 + }, + { + "epoch": 0.16621040281579977, + "loss": 0.08010925352573395, + "loss_ce": 0.0053640687838196754, + "loss_iou": 0.3203125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 164877932, + "step": 1700 + }, + { + "epoch": 0.16630817364098552, + "grad_norm": 2.8156070184500823, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 164974856, + "step": 1701 + }, + { + "epoch": 0.16630817364098552, + "loss": 0.10563354194164276, + "loss_ce": 0.009060666896402836, + "loss_iou": 0.3671875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 164974856, + "step": 1701 + }, + { + "epoch": 0.1664059444661713, + "grad_norm": 6.280002831743068, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 165071992, + "step": 1702 + }, + { + "epoch": 0.1664059444661713, + "loss": 0.09243647754192352, + "loss_ce": 0.005583452992141247, + "loss_iou": 0.34765625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 165071992, + "step": 1702 + }, + { + "epoch": 0.16650371529135705, + "grad_norm": 5.423254799925136, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 165168756, + "step": 1703 + }, + { + "epoch": 0.16650371529135705, + "loss": 0.14331525564193726, + "loss_ce": 0.009983963333070278, + "loss_iou": 0.41015625, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 165168756, + "step": 1703 + }, + { + "epoch": 0.16660148611654282, + "grad_norm": 4.925975552899353, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 165265928, + "step": 1704 + }, + { + "epoch": 0.16660148611654282, + "loss": 0.0749635100364685, + "loss_ce": 0.004902778193354607, + "loss_iou": 0.3125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 165265928, + "step": 1704 + }, + { + "epoch": 0.1666992569417286, + "grad_norm": 5.244315945942739, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 165362696, + "step": 1705 + }, + { + "epoch": 0.1666992569417286, + "loss": 0.09137747436761856, + "loss_ce": 0.006141874939203262, + "loss_iou": 0.35546875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 165362696, + "step": 1705 + }, + { + "epoch": 0.16679702776691435, + "grad_norm": 5.560203463459688, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 165458484, + "step": 1706 + }, + { + "epoch": 0.16679702776691435, + "loss": 0.07923118025064468, + "loss_ce": 0.0036391387693583965, + "loss_iou": 0.3203125, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 165458484, + "step": 1706 + }, + { + "epoch": 0.16689479859210013, + "grad_norm": 5.811292297681074, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 165555568, + "step": 1707 + }, + { + "epoch": 0.16689479859210013, + "loss": 0.09645232558250427, + "loss_ce": 0.005845637992024422, + "loss_iou": 0.279296875, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 165555568, + "step": 1707 + }, + { + "epoch": 0.16699256941728588, + "grad_norm": 10.029540360061729, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 165653124, + "step": 1708 + }, + { + "epoch": 0.16699256941728588, + "loss": 0.1096087098121643, + "loss_ce": 0.0042849211022257805, + "loss_iou": 0.37890625, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 165653124, + "step": 1708 + }, + { + "epoch": 0.16709034024247166, + "grad_norm": 8.813109661709547, + "learning_rate": 5e-05, + "loss": 0.1431, + "num_input_tokens_seen": 165750168, + "step": 1709 + }, + { + "epoch": 0.16709034024247166, + "loss": 0.16854041814804077, + "loss_ce": 0.00937599129974842, + "loss_iou": 0.37890625, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 165750168, + "step": 1709 + }, + { + "epoch": 0.1671881110676574, + "grad_norm": 8.064193429493395, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 165846840, + "step": 1710 + }, + { + "epoch": 0.1671881110676574, + "loss": 0.11667168140411377, + "loss_ce": 0.01295769214630127, + "loss_iou": 0.33203125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 165846840, + "step": 1710 + }, + { + "epoch": 0.16728588189284319, + "grad_norm": 8.635996732839347, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 165943056, + "step": 1711 + }, + { + "epoch": 0.16728588189284319, + "loss": 0.13036365807056427, + "loss_ce": 0.007621956989169121, + "loss_iou": 0.376953125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 165943056, + "step": 1711 + }, + { + "epoch": 0.16738365271802894, + "grad_norm": 3.68146754429202, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 166040300, + "step": 1712 + }, + { + "epoch": 0.16738365271802894, + "loss": 0.11166608333587646, + "loss_ce": 0.006227854639291763, + "loss_iou": 0.390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 166040300, + "step": 1712 + }, + { + "epoch": 0.1674814235432147, + "grad_norm": 11.275956228705864, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 166137328, + "step": 1713 + }, + { + "epoch": 0.1674814235432147, + "loss": 0.05342957749962807, + "loss_ce": 0.0034570423886179924, + "loss_iou": 0.353515625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 166137328, + "step": 1713 + }, + { + "epoch": 0.16757919436840046, + "grad_norm": 6.039450392554732, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 166234384, + "step": 1714 + }, + { + "epoch": 0.16757919436840046, + "loss": 0.1098552718758583, + "loss_ce": 0.00514946598559618, + "loss_iou": 0.306640625, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 166234384, + "step": 1714 + }, + { + "epoch": 0.16767696519358624, + "grad_norm": 8.048036007857737, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 166330940, + "step": 1715 + }, + { + "epoch": 0.16767696519358624, + "loss": 0.11266642063856125, + "loss_ce": 0.003016761038452387, + "loss_iou": 0.3125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 166330940, + "step": 1715 + }, + { + "epoch": 0.167774736018772, + "grad_norm": 12.560538899870588, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 166428688, + "step": 1716 + }, + { + "epoch": 0.167774736018772, + "loss": 0.09624377638101578, + "loss_ce": 0.006977952551096678, + "loss_iou": 0.34375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 166428688, + "step": 1716 + }, + { + "epoch": 0.16787250684395777, + "grad_norm": 6.151039917207586, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 166524640, + "step": 1717 + }, + { + "epoch": 0.16787250684395777, + "loss": 0.13325849175453186, + "loss_ce": 0.00990644283592701, + "loss_iou": 0.220703125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 166524640, + "step": 1717 + }, + { + "epoch": 0.16797027766914352, + "grad_norm": 6.818045187288642, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 166621640, + "step": 1718 + }, + { + "epoch": 0.16797027766914352, + "loss": 0.04745561629533768, + "loss_ce": 0.004051991738379002, + "loss_iou": 0.2333984375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 166621640, + "step": 1718 + }, + { + "epoch": 0.1680680484943293, + "grad_norm": 7.623123018260542, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 166718100, + "step": 1719 + }, + { + "epoch": 0.1680680484943293, + "loss": 0.09603668749332428, + "loss_ce": 0.003370065474882722, + "loss_iou": 0.458984375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 166718100, + "step": 1719 + }, + { + "epoch": 0.16816581931951505, + "grad_norm": 6.217694657035203, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 166815436, + "step": 1720 + }, + { + "epoch": 0.16816581931951505, + "loss": 0.10971363633871078, + "loss_ce": 0.009600725024938583, + "loss_iou": 0.271484375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 166815436, + "step": 1720 + }, + { + "epoch": 0.16826359014470083, + "grad_norm": 13.737381726472833, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 166912456, + "step": 1721 + }, + { + "epoch": 0.16826359014470083, + "loss": 0.06760872155427933, + "loss_ce": 0.0036896527744829655, + "loss_iou": 0.21875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 166912456, + "step": 1721 + }, + { + "epoch": 0.16836136096988658, + "grad_norm": 33.4522217078022, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 167008856, + "step": 1722 + }, + { + "epoch": 0.16836136096988658, + "loss": 0.0985453799366951, + "loss_ce": 0.006656948011368513, + "loss_iou": 0.34765625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 167008856, + "step": 1722 + }, + { + "epoch": 0.16845913179507235, + "grad_norm": 9.945484204111835, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 167105972, + "step": 1723 + }, + { + "epoch": 0.16845913179507235, + "loss": 0.12241629511117935, + "loss_ce": 0.0022533307783305645, + "loss_iou": 0.28515625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 167105972, + "step": 1723 + }, + { + "epoch": 0.1685569026202581, + "grad_norm": 35.00380904216881, + "learning_rate": 5e-05, + "loss": 0.1406, + "num_input_tokens_seen": 167202680, + "step": 1724 + }, + { + "epoch": 0.1685569026202581, + "loss": 0.1619114875793457, + "loss_ce": 0.01390122901648283, + "loss_iou": 0.41796875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 167202680, + "step": 1724 + }, + { + "epoch": 0.16865467344544388, + "grad_norm": 23.136689650175715, + "learning_rate": 5e-05, + "loss": 0.1477, + "num_input_tokens_seen": 167299516, + "step": 1725 + }, + { + "epoch": 0.16865467344544388, + "loss": 0.16226285696029663, + "loss_ce": 0.008515294641256332, + "loss_iou": 0.376953125, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 167299516, + "step": 1725 + }, + { + "epoch": 0.16875244427062963, + "grad_norm": 13.638617474900359, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 167396892, + "step": 1726 + }, + { + "epoch": 0.16875244427062963, + "loss": 0.12275565415620804, + "loss_ce": 0.008986126631498337, + "loss_iou": 0.388671875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 167396892, + "step": 1726 + }, + { + "epoch": 0.1688502150958154, + "grad_norm": 23.86141627194825, + "learning_rate": 5e-05, + "loss": 0.1369, + "num_input_tokens_seen": 167493880, + "step": 1727 + }, + { + "epoch": 0.1688502150958154, + "loss": 0.17122569680213928, + "loss_ce": 0.0060645658522844315, + "loss_iou": 0.470703125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 167493880, + "step": 1727 + }, + { + "epoch": 0.1689479859210012, + "grad_norm": 12.803909617505989, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 167591536, + "step": 1728 + }, + { + "epoch": 0.1689479859210012, + "loss": 0.13453873991966248, + "loss_ce": 0.0097523657605052, + "loss_iou": 0.37890625, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 167591536, + "step": 1728 + }, + { + "epoch": 0.16904575674618694, + "grad_norm": 11.35195219727063, + "learning_rate": 5e-05, + "loss": 0.1503, + "num_input_tokens_seen": 167688984, + "step": 1729 + }, + { + "epoch": 0.16904575674618694, + "loss": 0.12231766432523727, + "loss_ce": 0.002475142478942871, + "loss_iou": 0.373046875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 167688984, + "step": 1729 + }, + { + "epoch": 0.16914352757137271, + "grad_norm": 8.566023726747172, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 167785024, + "step": 1730 + }, + { + "epoch": 0.16914352757137271, + "loss": 0.07196322828531265, + "loss_ce": 0.006686130538582802, + "loss_iou": 0.29296875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 167785024, + "step": 1730 + }, + { + "epoch": 0.16924129839655846, + "grad_norm": 10.5125412464912, + "learning_rate": 5e-05, + "loss": 0.148, + "num_input_tokens_seen": 167882296, + "step": 1731 + }, + { + "epoch": 0.16924129839655846, + "loss": 0.17851819097995758, + "loss_ce": 0.005117310676723719, + "loss_iou": 0.44921875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 167882296, + "step": 1731 + }, + { + "epoch": 0.16933906922174424, + "grad_norm": 12.1830289963555, + "learning_rate": 5e-05, + "loss": 0.1495, + "num_input_tokens_seen": 167980004, + "step": 1732 + }, + { + "epoch": 0.16933906922174424, + "loss": 0.15253669023513794, + "loss_ce": 0.008249586448073387, + "loss_iou": 0.34765625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 167980004, + "step": 1732 + }, + { + "epoch": 0.16943684004693, + "grad_norm": 7.489601053468146, + "learning_rate": 5e-05, + "loss": 0.1335, + "num_input_tokens_seen": 168076788, + "step": 1733 + }, + { + "epoch": 0.16943684004693, + "loss": 0.1257418394088745, + "loss_ce": 0.005594149697571993, + "loss_iou": 0.259765625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 168076788, + "step": 1733 + }, + { + "epoch": 0.16953461087211577, + "grad_norm": 2.54215501582339, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 168173268, + "step": 1734 + }, + { + "epoch": 0.16953461087211577, + "loss": 0.12080740183591843, + "loss_ce": 0.008167019113898277, + "loss_iou": 0.275390625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 168173268, + "step": 1734 + }, + { + "epoch": 0.16963238169730152, + "grad_norm": 1.9620945436721384, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 168269048, + "step": 1735 + }, + { + "epoch": 0.16963238169730152, + "loss": 0.07670199871063232, + "loss_ce": 0.007533898577094078, + "loss_iou": 0.27734375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 168269048, + "step": 1735 + }, + { + "epoch": 0.1697301525224873, + "grad_norm": 19.633948739577495, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 168366412, + "step": 1736 + }, + { + "epoch": 0.1697301525224873, + "loss": 0.08129991590976715, + "loss_ce": 0.005593426525592804, + "loss_iou": 0.337890625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 168366412, + "step": 1736 + }, + { + "epoch": 0.16982792334767305, + "grad_norm": 48.50913893646234, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 168463984, + "step": 1737 + }, + { + "epoch": 0.16982792334767305, + "loss": 0.0860808938741684, + "loss_ce": 0.00290523748844862, + "loss_iou": 0.3515625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 168463984, + "step": 1737 + }, + { + "epoch": 0.16992569417285883, + "grad_norm": 46.618097181082156, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 168560588, + "step": 1738 + }, + { + "epoch": 0.16992569417285883, + "loss": 0.15187221765518188, + "loss_ce": 0.006928975693881512, + "loss_iou": 0.283203125, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 168560588, + "step": 1738 + }, + { + "epoch": 0.17002346499804458, + "grad_norm": 5.757565602907301, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 168657068, + "step": 1739 + }, + { + "epoch": 0.17002346499804458, + "loss": 0.07006825506687164, + "loss_ce": 0.003875626716762781, + "loss_iou": 0.376953125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 168657068, + "step": 1739 + }, + { + "epoch": 0.17012123582323035, + "grad_norm": 22.942514075955504, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 168753916, + "step": 1740 + }, + { + "epoch": 0.17012123582323035, + "loss": 0.10458715260028839, + "loss_ce": 0.004794669337570667, + "loss_iou": 0.47265625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 168753916, + "step": 1740 + }, + { + "epoch": 0.1702190066484161, + "grad_norm": 10.1610543322017, + "learning_rate": 5e-05, + "loss": 0.1219, + "num_input_tokens_seen": 168851196, + "step": 1741 + }, + { + "epoch": 0.1702190066484161, + "loss": 0.12932787835597992, + "loss_ce": 0.005457031540572643, + "loss_iou": 0.375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 168851196, + "step": 1741 + }, + { + "epoch": 0.17031677747360188, + "grad_norm": 9.810295266546836, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 168947788, + "step": 1742 + }, + { + "epoch": 0.17031677747360188, + "loss": 0.07229545712471008, + "loss_ce": 0.003600388765335083, + "loss_iou": 0.447265625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 168947788, + "step": 1742 + }, + { + "epoch": 0.17041454829878763, + "grad_norm": 67.0117042054245, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 169045040, + "step": 1743 + }, + { + "epoch": 0.17041454829878763, + "loss": 0.09537501633167267, + "loss_ce": 0.003242448903620243, + "loss_iou": 0.390625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 169045040, + "step": 1743 + }, + { + "epoch": 0.1705123191239734, + "grad_norm": 15.75139624635953, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 169141100, + "step": 1744 + }, + { + "epoch": 0.1705123191239734, + "loss": 0.13454042375087738, + "loss_ce": 0.006061412859708071, + "loss_iou": 0.3203125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 169141100, + "step": 1744 + }, + { + "epoch": 0.17061008994915916, + "grad_norm": 6.079033301658242, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 169238276, + "step": 1745 + }, + { + "epoch": 0.17061008994915916, + "loss": 0.14213615655899048, + "loss_ce": 0.00859123095870018, + "loss_iou": 0.474609375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 169238276, + "step": 1745 + }, + { + "epoch": 0.17070786077434494, + "grad_norm": 4.483279212772326, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 169335252, + "step": 1746 + }, + { + "epoch": 0.17070786077434494, + "loss": 0.09764916449785233, + "loss_ce": 0.00707298843190074, + "loss_iou": 0.36328125, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 169335252, + "step": 1746 + }, + { + "epoch": 0.1708056315995307, + "grad_norm": 5.514608043795863, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 169432804, + "step": 1747 + }, + { + "epoch": 0.1708056315995307, + "loss": 0.08122092485427856, + "loss_ce": 0.005720432847738266, + "loss_iou": 0.42578125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 169432804, + "step": 1747 + }, + { + "epoch": 0.17090340242471647, + "grad_norm": 7.358975012212102, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 169529756, + "step": 1748 + }, + { + "epoch": 0.17090340242471647, + "loss": 0.08547437191009521, + "loss_ce": 0.007273077964782715, + "loss_iou": 0.390625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 169529756, + "step": 1748 + }, + { + "epoch": 0.17100117324990222, + "grad_norm": 12.62604865418977, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 169625616, + "step": 1749 + }, + { + "epoch": 0.17100117324990222, + "loss": 0.0840761810541153, + "loss_ce": 0.0031740874983370304, + "loss_iou": 0.27734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 169625616, + "step": 1749 + }, + { + "epoch": 0.171098944075088, + "grad_norm": 4.099529213166408, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 169722820, + "step": 1750 + }, + { + "epoch": 0.171098944075088, + "eval_seeclick_CIoU": 0.32547053694725037, + "eval_seeclick_GIoU": 0.31676962971687317, + "eval_seeclick_IoU": 0.4054033160209656, + "eval_seeclick_MAE_all": 0.11165900155901909, + "eval_seeclick_MAE_h": 0.05342032387852669, + "eval_seeclick_MAE_w": 0.1610238179564476, + "eval_seeclick_MAE_x": 0.17816119641065598, + "eval_seeclick_MAE_y": 0.054030681028962135, + "eval_seeclick_NUM_probability": 0.9999870359897614, + "eval_seeclick_inside_bbox": 0.6463068127632141, + "eval_seeclick_loss": 0.36308449506759644, + "eval_seeclick_loss_ce": 0.00955036748200655, + "eval_seeclick_loss_iou": 0.41448974609375, + "eval_seeclick_loss_num": 0.06658172607421875, + "eval_seeclick_loss_xval": 0.332763671875, + "eval_seeclick_runtime": 74.0998, + "eval_seeclick_samples_per_second": 0.58, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 169722820, + "step": 1750 + }, + { + "epoch": 0.171098944075088, + "eval_icons_CIoU": 0.5809520483016968, + "eval_icons_GIoU": 0.58564093708992, + "eval_icons_IoU": 0.61971315741539, + "eval_icons_MAE_all": 0.07277822121977806, + "eval_icons_MAE_h": 0.09050784632563591, + "eval_icons_MAE_w": 0.057128435000777245, + "eval_icons_MAE_x": 0.054911695420742035, + "eval_icons_MAE_y": 0.08856489509344101, + "eval_icons_NUM_probability": 0.999861091375351, + "eval_icons_inside_bbox": 0.7916666567325592, + "eval_icons_loss": 0.23974891006946564, + "eval_icons_loss_ce": 0.00010013611790782306, + "eval_icons_loss_iou": 0.42022705078125, + "eval_icons_loss_num": 0.050975799560546875, + "eval_icons_loss_xval": 0.254974365234375, + "eval_icons_runtime": 94.7699, + "eval_icons_samples_per_second": 0.528, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 169722820, + "step": 1750 + }, + { + "epoch": 0.171098944075088, + "eval_screenspot_CIoU": 0.24126344670852026, + "eval_screenspot_GIoU": 0.2276924947897593, + "eval_screenspot_IoU": 0.35245073835055035, + "eval_screenspot_MAE_all": 0.184987540046374, + "eval_screenspot_MAE_h": 0.13951651006937027, + "eval_screenspot_MAE_w": 0.25950536131858826, + "eval_screenspot_MAE_x": 0.20999121169249216, + "eval_screenspot_MAE_y": 0.13093706965446472, + "eval_screenspot_NUM_probability": 0.9999627868334452, + "eval_screenspot_inside_bbox": 0.6108333269755045, + "eval_screenspot_loss": 0.6356346011161804, + "eval_screenspot_loss_ce": 0.02103206453224023, + "eval_screenspot_loss_iou": 0.3531901041666667, + "eval_screenspot_loss_num": 0.12375895182291667, + "eval_screenspot_loss_xval": 0.6187744140625, + "eval_screenspot_runtime": 156.2389, + "eval_screenspot_samples_per_second": 0.57, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 169722820, + "step": 1750 + }, + { + "epoch": 0.171098944075088, + "eval_compot_CIoU": 0.431398943066597, + "eval_compot_GIoU": 0.4113926440477371, + "eval_compot_IoU": 0.5035108029842377, + "eval_compot_MAE_all": 0.10276490077376366, + "eval_compot_MAE_h": 0.07231802493333817, + "eval_compot_MAE_w": 0.13212398439645767, + "eval_compot_MAE_x": 0.1301681511104107, + "eval_compot_MAE_y": 0.07644941098988056, + "eval_compot_NUM_probability": 0.9999586045742035, + "eval_compot_inside_bbox": 0.6336805522441864, + "eval_compot_loss": 0.31810420751571655, + "eval_compot_loss_ce": 0.014162306673824787, + "eval_compot_loss_iou": 0.47900390625, + "eval_compot_loss_num": 0.055408477783203125, + "eval_compot_loss_xval": 0.27703857421875, + "eval_compot_runtime": 93.7935, + "eval_compot_samples_per_second": 0.533, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 169722820, + "step": 1750 + }, + { + "epoch": 0.171098944075088, + "loss": 0.27959829568862915, + "loss_ce": 0.014156382530927658, + "loss_iou": 0.51171875, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 169722820, + "step": 1750 + }, + { + "epoch": 0.17119671490027377, + "grad_norm": 20.069460480302887, + "learning_rate": 5e-05, + "loss": 0.1384, + "num_input_tokens_seen": 169819728, + "step": 1751 + }, + { + "epoch": 0.17119671490027377, + "loss": 0.18314501643180847, + "loss_ce": 0.013116342946887016, + "loss_iou": 0.353515625, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 169819728, + "step": 1751 + }, + { + "epoch": 0.17129448572545952, + "grad_norm": 4.745148220904267, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 169917288, + "step": 1752 + }, + { + "epoch": 0.17129448572545952, + "loss": 0.08278094232082367, + "loss_ce": 0.003481019288301468, + "loss_iou": 0.3984375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 169917288, + "step": 1752 + }, + { + "epoch": 0.1713922565506453, + "grad_norm": 3.4687647938960593, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 170014744, + "step": 1753 + }, + { + "epoch": 0.1713922565506453, + "loss": 0.10461016744375229, + "loss_ce": 0.008495055139064789, + "loss_iou": 0.44140625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 170014744, + "step": 1753 + }, + { + "epoch": 0.17149002737583105, + "grad_norm": 18.894849395092407, + "learning_rate": 5e-05, + "loss": 0.1286, + "num_input_tokens_seen": 170113492, + "step": 1754 + }, + { + "epoch": 0.17149002737583105, + "loss": 0.16015349328517914, + "loss_ce": 0.008603200316429138, + "loss_iou": 0.37890625, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 170113492, + "step": 1754 + }, + { + "epoch": 0.17158779820101683, + "grad_norm": 11.688309683896644, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 170210640, + "step": 1755 + }, + { + "epoch": 0.17158779820101683, + "loss": 0.1288287490606308, + "loss_ce": 0.005385142285376787, + "loss_iou": 0.421875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 170210640, + "step": 1755 + }, + { + "epoch": 0.17168556902620258, + "grad_norm": 3.0391713207848023, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 170307616, + "step": 1756 + }, + { + "epoch": 0.17168556902620258, + "loss": 0.0981626808643341, + "loss_ce": 0.007475888356566429, + "loss_iou": 0.369140625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 170307616, + "step": 1756 + }, + { + "epoch": 0.17178333985138836, + "grad_norm": 6.5523397080573025, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 170404812, + "step": 1757 + }, + { + "epoch": 0.17178333985138836, + "loss": 0.12287208437919617, + "loss_ce": 0.007042611017823219, + "loss_iou": 0.349609375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 170404812, + "step": 1757 + }, + { + "epoch": 0.1718811106765741, + "grad_norm": 9.8769165085956, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 170501876, + "step": 1758 + }, + { + "epoch": 0.1718811106765741, + "loss": 0.09520746767520905, + "loss_ce": 0.008720655925571918, + "loss_iou": 0.328125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 170501876, + "step": 1758 + }, + { + "epoch": 0.17197888150175988, + "grad_norm": 21.538015309812845, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 170598560, + "step": 1759 + }, + { + "epoch": 0.17197888150175988, + "loss": 0.13207083940505981, + "loss_ce": 0.009176554158329964, + "loss_iou": 0.37890625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 170598560, + "step": 1759 + }, + { + "epoch": 0.17207665232694563, + "grad_norm": 31.796433057000847, + "learning_rate": 5e-05, + "loss": 0.1286, + "num_input_tokens_seen": 170694928, + "step": 1760 + }, + { + "epoch": 0.17207665232694563, + "loss": 0.14215274155139923, + "loss_ce": 0.005159336142241955, + "loss_iou": 0.353515625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 170694928, + "step": 1760 + }, + { + "epoch": 0.1721744231521314, + "grad_norm": 14.700537060197366, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 170792524, + "step": 1761 + }, + { + "epoch": 0.1721744231521314, + "loss": 0.10764391720294952, + "loss_ce": 0.009438352659344673, + "loss_iou": 0.412109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 170792524, + "step": 1761 + }, + { + "epoch": 0.17227219397731716, + "grad_norm": 4.270314949130479, + "learning_rate": 5e-05, + "loss": 0.1549, + "num_input_tokens_seen": 170889216, + "step": 1762 + }, + { + "epoch": 0.17227219397731716, + "loss": 0.157547265291214, + "loss_ce": 0.0037997025065124035, + "loss_iou": 0.412109375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 170889216, + "step": 1762 + }, + { + "epoch": 0.17236996480250294, + "grad_norm": 1.3834695446138083, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 170986264, + "step": 1763 + }, + { + "epoch": 0.17236996480250294, + "loss": 0.10953309386968613, + "loss_ce": 0.008630530908703804, + "loss_iou": 0.287109375, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 170986264, + "step": 1763 + }, + { + "epoch": 0.1724677356276887, + "grad_norm": 6.430037335580081, + "learning_rate": 5e-05, + "loss": 0.1206, + "num_input_tokens_seen": 171083596, + "step": 1764 + }, + { + "epoch": 0.1724677356276887, + "loss": 0.11717360466718674, + "loss_ce": 0.011643823236227036, + "loss_iou": 0.32421875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 171083596, + "step": 1764 + }, + { + "epoch": 0.17256550645287447, + "grad_norm": 1.548987945629295, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 171180304, + "step": 1765 + }, + { + "epoch": 0.17256550645287447, + "loss": 0.06704387068748474, + "loss_ce": 0.007916067726910114, + "loss_iou": 0.1962890625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 171180304, + "step": 1765 + }, + { + "epoch": 0.17266327727806022, + "grad_norm": 2.0923932654688504, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 171278224, + "step": 1766 + }, + { + "epoch": 0.17266327727806022, + "loss": 0.05684623122215271, + "loss_ce": 0.003913492895662785, + "loss_iou": 0.32421875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 171278224, + "step": 1766 + }, + { + "epoch": 0.172761048103246, + "grad_norm": 14.789652847842193, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 171375044, + "step": 1767 + }, + { + "epoch": 0.172761048103246, + "loss": 0.12768810987472534, + "loss_ce": 0.006457027047872543, + "loss_iou": 0.2734375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 171375044, + "step": 1767 + }, + { + "epoch": 0.17285881892843175, + "grad_norm": 18.655112632116992, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 171471948, + "step": 1768 + }, + { + "epoch": 0.17285881892843175, + "loss": 0.0863504409790039, + "loss_ce": 0.008484834805130959, + "loss_iou": 0.33203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 171471948, + "step": 1768 + }, + { + "epoch": 0.17295658975361752, + "grad_norm": 19.068565508903628, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 171568928, + "step": 1769 + }, + { + "epoch": 0.17295658975361752, + "loss": 0.11154818534851074, + "loss_ce": 0.006842382252216339, + "loss_iou": 0.322265625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 171568928, + "step": 1769 + }, + { + "epoch": 0.17305436057880327, + "grad_norm": 23.12491195851193, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 171666788, + "step": 1770 + }, + { + "epoch": 0.17305436057880327, + "loss": 0.10253145545721054, + "loss_ce": 0.005577115807682276, + "loss_iou": 0.46875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 171666788, + "step": 1770 + }, + { + "epoch": 0.17315213140398905, + "grad_norm": 11.273144035301735, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 171763604, + "step": 1771 + }, + { + "epoch": 0.17315213140398905, + "loss": 0.11651171743869781, + "loss_ce": 0.004344357177615166, + "loss_iou": 0.388671875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 171763604, + "step": 1771 + }, + { + "epoch": 0.1732499022291748, + "grad_norm": 284.14402137369166, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 171860744, + "step": 1772 + }, + { + "epoch": 0.1732499022291748, + "loss": 0.13198095560073853, + "loss_ce": 0.006797846872359514, + "loss_iou": 0.3515625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 171860744, + "step": 1772 + }, + { + "epoch": 0.17334767305436058, + "grad_norm": 6.426096651143094, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 171957476, + "step": 1773 + }, + { + "epoch": 0.17334767305436058, + "loss": 0.0890352800488472, + "loss_ce": 0.0031817066483199596, + "loss_iou": 0.396484375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 171957476, + "step": 1773 + }, + { + "epoch": 0.17344544387954636, + "grad_norm": 9.669861157042282, + "learning_rate": 5e-05, + "loss": 0.1822, + "num_input_tokens_seen": 172054972, + "step": 1774 + }, + { + "epoch": 0.17344544387954636, + "loss": 0.19394007325172424, + "loss_ce": 0.004944714717566967, + "loss_iou": 0.30078125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 172054972, + "step": 1774 + }, + { + "epoch": 0.1735432147047321, + "grad_norm": 1.9007302580436323, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 172151260, + "step": 1775 + }, + { + "epoch": 0.1735432147047321, + "loss": 0.07814139127731323, + "loss_ce": 0.004838170483708382, + "loss_iou": 0.33984375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 172151260, + "step": 1775 + }, + { + "epoch": 0.17364098552991789, + "grad_norm": 10.047289702897103, + "learning_rate": 5e-05, + "loss": 0.1312, + "num_input_tokens_seen": 172248456, + "step": 1776 + }, + { + "epoch": 0.17364098552991789, + "loss": 0.11661923676729202, + "loss_ce": 0.0075341579504311085, + "loss_iou": 0.2470703125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 172248456, + "step": 1776 + }, + { + "epoch": 0.17373875635510364, + "grad_norm": 13.656982579172796, + "learning_rate": 5e-05, + "loss": 0.1548, + "num_input_tokens_seen": 172344188, + "step": 1777 + }, + { + "epoch": 0.17373875635510364, + "loss": 0.14049941301345825, + "loss_ce": 0.010433504357933998, + "loss_iou": 0.32421875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 172344188, + "step": 1777 + }, + { + "epoch": 0.1738365271802894, + "grad_norm": 6.293299345405203, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 172441240, + "step": 1778 + }, + { + "epoch": 0.1738365271802894, + "loss": 0.08253410458564758, + "loss_ce": 0.0056755878031253815, + "loss_iou": 0.3671875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 172441240, + "step": 1778 + }, + { + "epoch": 0.17393429800547516, + "grad_norm": 7.850027725999382, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 172538348, + "step": 1779 + }, + { + "epoch": 0.17393429800547516, + "loss": 0.12049102783203125, + "loss_ce": 0.0062179528176784515, + "loss_iou": 0.287109375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 172538348, + "step": 1779 + }, + { + "epoch": 0.17403206883066094, + "grad_norm": 8.022138851061237, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 172635328, + "step": 1780 + }, + { + "epoch": 0.17403206883066094, + "loss": 0.07287028431892395, + "loss_ce": 0.00335123622789979, + "loss_iou": 0.328125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 172635328, + "step": 1780 + }, + { + "epoch": 0.1741298396558467, + "grad_norm": 13.289945297063005, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 172732264, + "step": 1781 + }, + { + "epoch": 0.1741298396558467, + "loss": 0.10767631232738495, + "loss_ce": 0.005379483103752136, + "loss_iou": 0.318359375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 172732264, + "step": 1781 + }, + { + "epoch": 0.17422761048103247, + "grad_norm": 25.263914736003024, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 172829108, + "step": 1782 + }, + { + "epoch": 0.17422761048103247, + "loss": 0.08409231901168823, + "loss_ce": 0.005829991772770882, + "loss_iou": 0.357421875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 172829108, + "step": 1782 + }, + { + "epoch": 0.17432538130621822, + "grad_norm": 4.566742338718881, + "learning_rate": 5e-05, + "loss": 0.1714, + "num_input_tokens_seen": 172925228, + "step": 1783 + }, + { + "epoch": 0.17432538130621822, + "loss": 0.12841206789016724, + "loss_ce": 0.002862761029973626, + "loss_iou": 0.40234375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 172925228, + "step": 1783 + }, + { + "epoch": 0.174423152131404, + "grad_norm": 4.64699164262592, + "learning_rate": 5e-05, + "loss": 0.1276, + "num_input_tokens_seen": 173022252, + "step": 1784 + }, + { + "epoch": 0.174423152131404, + "loss": 0.1464272141456604, + "loss_ce": 0.013584190979599953, + "loss_iou": 0.36328125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 173022252, + "step": 1784 + }, + { + "epoch": 0.17452092295658975, + "grad_norm": 21.600008830096126, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 173118948, + "step": 1785 + }, + { + "epoch": 0.17452092295658975, + "loss": 0.1384831964969635, + "loss_ce": 0.006403122562915087, + "loss_iou": 0.294921875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 173118948, + "step": 1785 + }, + { + "epoch": 0.17461869378177552, + "grad_norm": 9.920647834292462, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 173215804, + "step": 1786 + }, + { + "epoch": 0.17461869378177552, + "loss": 0.1341564655303955, + "loss_ce": 0.005158649757504463, + "loss_iou": 0.291015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 173215804, + "step": 1786 + }, + { + "epoch": 0.17471646460696127, + "grad_norm": 33.394969898898374, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 173313280, + "step": 1787 + }, + { + "epoch": 0.17471646460696127, + "loss": 0.12746869027614594, + "loss_ce": 0.005001647863537073, + "loss_iou": 0.380859375, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 173313280, + "step": 1787 + }, + { + "epoch": 0.17481423543214705, + "grad_norm": 7.100527733365872, + "learning_rate": 5e-05, + "loss": 0.1448, + "num_input_tokens_seen": 173410384, + "step": 1788 + }, + { + "epoch": 0.17481423543214705, + "loss": 0.1492377668619156, + "loss_ce": 0.015143523924052715, + "loss_iou": 0.2138671875, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 173410384, + "step": 1788 + }, + { + "epoch": 0.1749120062573328, + "grad_norm": 3.4049663766539555, + "learning_rate": 5e-05, + "loss": 0.1412, + "num_input_tokens_seen": 173507536, + "step": 1789 + }, + { + "epoch": 0.1749120062573328, + "loss": 0.1395166516304016, + "loss_ce": 0.006246386561542749, + "loss_iou": 0.3515625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 173507536, + "step": 1789 + }, + { + "epoch": 0.17500977708251858, + "grad_norm": 5.037724154533689, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 173604660, + "step": 1790 + }, + { + "epoch": 0.17500977708251858, + "loss": 0.11613182723522186, + "loss_ce": 0.003552478039637208, + "loss_iou": 0.330078125, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 173604660, + "step": 1790 + }, + { + "epoch": 0.17510754790770433, + "grad_norm": 5.041018254372803, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 173701624, + "step": 1791 + }, + { + "epoch": 0.17510754790770433, + "loss": 0.14069250226020813, + "loss_ce": 0.004401009995490313, + "loss_iou": 0.47265625, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 173701624, + "step": 1791 + }, + { + "epoch": 0.1752053187328901, + "grad_norm": 52.68049734263129, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 173798712, + "step": 1792 + }, + { + "epoch": 0.1752053187328901, + "loss": 0.11172651499509811, + "loss_ce": 0.00283979345113039, + "loss_iou": 0.423828125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 173798712, + "step": 1792 + }, + { + "epoch": 0.17530308955807586, + "grad_norm": 23.624741653227982, + "learning_rate": 5e-05, + "loss": 0.16, + "num_input_tokens_seen": 173896672, + "step": 1793 + }, + { + "epoch": 0.17530308955807586, + "loss": 0.1588350385427475, + "loss_ce": 0.0037447107024490833, + "loss_iou": 0.4140625, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 173896672, + "step": 1793 + }, + { + "epoch": 0.17540086038326164, + "grad_norm": 6.01824162102047, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 173994448, + "step": 1794 + }, + { + "epoch": 0.17540086038326164, + "loss": 0.13182896375656128, + "loss_ce": 0.005608266219496727, + "loss_iou": 0.431640625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 173994448, + "step": 1794 + }, + { + "epoch": 0.1754986312084474, + "grad_norm": 10.40919878290256, + "learning_rate": 5e-05, + "loss": 0.1438, + "num_input_tokens_seen": 174091632, + "step": 1795 + }, + { + "epoch": 0.1754986312084474, + "loss": 0.16095907986164093, + "loss_ce": 0.008249121718108654, + "loss_iou": 0.40625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 174091632, + "step": 1795 + }, + { + "epoch": 0.17559640203363316, + "grad_norm": 12.849953559755098, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 174188008, + "step": 1796 + }, + { + "epoch": 0.17559640203363316, + "loss": 0.08541396260261536, + "loss_ce": 0.006526017561554909, + "loss_iou": 0.396484375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 174188008, + "step": 1796 + }, + { + "epoch": 0.17569417285881894, + "grad_norm": 11.751204516417674, + "learning_rate": 5e-05, + "loss": 0.1309, + "num_input_tokens_seen": 174284812, + "step": 1797 + }, + { + "epoch": 0.17569417285881894, + "loss": 0.12458867579698563, + "loss_ce": 0.005783744156360626, + "loss_iou": 0.392578125, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 174284812, + "step": 1797 + }, + { + "epoch": 0.1757919436840047, + "grad_norm": 20.67652495715979, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 174382688, + "step": 1798 + }, + { + "epoch": 0.1757919436840047, + "loss": 0.15317504107952118, + "loss_ce": 0.004981680773198605, + "loss_iou": 0.41796875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 174382688, + "step": 1798 + }, + { + "epoch": 0.17588971450919047, + "grad_norm": 24.473932015379074, + "learning_rate": 5e-05, + "loss": 0.1559, + "num_input_tokens_seen": 174479496, + "step": 1799 + }, + { + "epoch": 0.17588971450919047, + "loss": 0.13842400908470154, + "loss_ce": 0.004390806891024113, + "loss_iou": 0.4140625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 174479496, + "step": 1799 + }, + { + "epoch": 0.17598748533437622, + "grad_norm": 13.442028981526898, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 174576108, + "step": 1800 + }, + { + "epoch": 0.17598748533437622, + "loss": 0.11557255685329437, + "loss_ce": 0.003252605441957712, + "loss_iou": 0.349609375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 174576108, + "step": 1800 + }, + { + "epoch": 0.176085256159562, + "grad_norm": 6.9166483352129795, + "learning_rate": 5e-05, + "loss": 0.1364, + "num_input_tokens_seen": 174672852, + "step": 1801 + }, + { + "epoch": 0.176085256159562, + "loss": 0.13452033698558807, + "loss_ce": 0.0114734573289752, + "loss_iou": 0.2890625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 174672852, + "step": 1801 + }, + { + "epoch": 0.17618302698474775, + "grad_norm": 13.165210434517167, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 174769564, + "step": 1802 + }, + { + "epoch": 0.17618302698474775, + "loss": 0.07956399023532867, + "loss_ce": 0.007847676984965801, + "loss_iou": 0.337890625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 174769564, + "step": 1802 + }, + { + "epoch": 0.17628079780993353, + "grad_norm": 22.73528102224294, + "learning_rate": 5e-05, + "loss": 0.1159, + "num_input_tokens_seen": 174865988, + "step": 1803 + }, + { + "epoch": 0.17628079780993353, + "loss": 0.1064772978425026, + "loss_ce": 0.005105532240122557, + "loss_iou": 0.1328125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 174865988, + "step": 1803 + }, + { + "epoch": 0.17637856863511928, + "grad_norm": 4.671162393672091, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 174962660, + "step": 1804 + }, + { + "epoch": 0.17637856863511928, + "loss": 0.06416119635105133, + "loss_ce": 0.005124944262206554, + "loss_iou": 0.201171875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 174962660, + "step": 1804 + }, + { + "epoch": 0.17647633946030505, + "grad_norm": 4.20225356507911, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 175059120, + "step": 1805 + }, + { + "epoch": 0.17647633946030505, + "loss": 0.13809770345687866, + "loss_ce": 0.005681926384568214, + "loss_iou": 0.279296875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 175059120, + "step": 1805 + }, + { + "epoch": 0.1765741102854908, + "grad_norm": 25.29885440018052, + "learning_rate": 5e-05, + "loss": 0.133, + "num_input_tokens_seen": 175156728, + "step": 1806 + }, + { + "epoch": 0.1765741102854908, + "loss": 0.09567621350288391, + "loss_ce": 0.0054967645555734634, + "loss_iou": 0.279296875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 175156728, + "step": 1806 + }, + { + "epoch": 0.17667188111067658, + "grad_norm": 35.56825252444799, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 175252868, + "step": 1807 + }, + { + "epoch": 0.17667188111067658, + "loss": 0.1107751801609993, + "loss_ce": 0.00754947355017066, + "loss_iou": 0.328125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 175252868, + "step": 1807 + }, + { + "epoch": 0.17676965193586233, + "grad_norm": 24.15295398768862, + "learning_rate": 5e-05, + "loss": 0.1397, + "num_input_tokens_seen": 175351540, + "step": 1808 + }, + { + "epoch": 0.17676965193586233, + "loss": 0.17031733691692352, + "loss_ce": 0.010405219160020351, + "loss_iou": 0.3828125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 175351540, + "step": 1808 + }, + { + "epoch": 0.1768674227610481, + "grad_norm": 20.147477433767136, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 175448160, + "step": 1809 + }, + { + "epoch": 0.1768674227610481, + "loss": 0.12691764533519745, + "loss_ce": 0.006678384263068438, + "loss_iou": 0.4140625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 175448160, + "step": 1809 + }, + { + "epoch": 0.17696519358623386, + "grad_norm": 14.990335655118427, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 175544892, + "step": 1810 + }, + { + "epoch": 0.17696519358623386, + "loss": 0.07339689135551453, + "loss_ce": 0.0046255276538431644, + "loss_iou": 0.359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 175544892, + "step": 1810 + }, + { + "epoch": 0.17706296441141964, + "grad_norm": 15.452953534605903, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 175642912, + "step": 1811 + }, + { + "epoch": 0.17706296441141964, + "loss": 0.09374170750379562, + "loss_ce": 0.0026162187568843365, + "loss_iou": 0.439453125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 175642912, + "step": 1811 + }, + { + "epoch": 0.1771607352366054, + "grad_norm": 19.0133321496689, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 175740208, + "step": 1812 + }, + { + "epoch": 0.1771607352366054, + "loss": 0.10108634829521179, + "loss_ce": 0.003872599918395281, + "loss_iou": 0.341796875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 175740208, + "step": 1812 + }, + { + "epoch": 0.17725850606179117, + "grad_norm": 15.432332845047396, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 175837156, + "step": 1813 + }, + { + "epoch": 0.17725850606179117, + "loss": 0.11452087759971619, + "loss_ce": 0.005023809615522623, + "loss_iou": 0.3515625, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 175837156, + "step": 1813 + }, + { + "epoch": 0.17735627688697692, + "grad_norm": 13.700355322921, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 175934388, + "step": 1814 + }, + { + "epoch": 0.17735627688697692, + "loss": 0.07596168667078018, + "loss_ce": 0.006656264886260033, + "loss_iou": 0.3984375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 175934388, + "step": 1814 + }, + { + "epoch": 0.1774540477121627, + "grad_norm": 9.482170489076156, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 176031812, + "step": 1815 + }, + { + "epoch": 0.1774540477121627, + "loss": 0.08231639117002487, + "loss_ce": 0.0043134624138474464, + "loss_iou": 0.51171875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 176031812, + "step": 1815 + }, + { + "epoch": 0.17755181853734844, + "grad_norm": 7.268817253052228, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 176129288, + "step": 1816 + }, + { + "epoch": 0.17755181853734844, + "loss": 0.09096719324588776, + "loss_ce": 0.00924112182110548, + "loss_iou": 0.353515625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 176129288, + "step": 1816 + }, + { + "epoch": 0.17764958936253422, + "grad_norm": 7.164769937293558, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 176226392, + "step": 1817 + }, + { + "epoch": 0.17764958936253422, + "loss": 0.11995048075914383, + "loss_ce": 0.011765668168663979, + "loss_iou": 0.349609375, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 176226392, + "step": 1817 + }, + { + "epoch": 0.17774736018771997, + "grad_norm": 4.222853979482157, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 176323572, + "step": 1818 + }, + { + "epoch": 0.17774736018771997, + "loss": 0.1186966598033905, + "loss_ce": 0.010694950819015503, + "loss_iou": 0.298828125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 176323572, + "step": 1818 + }, + { + "epoch": 0.17784513101290575, + "grad_norm": 5.20714964278407, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 176421032, + "step": 1819 + }, + { + "epoch": 0.17784513101290575, + "loss": 0.08544585108757019, + "loss_ce": 0.0024838196113705635, + "loss_iou": 0.25390625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 176421032, + "step": 1819 + }, + { + "epoch": 0.17794290183809153, + "grad_norm": 17.839189754874944, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 176518760, + "step": 1820 + }, + { + "epoch": 0.17794290183809153, + "loss": 0.1239302009344101, + "loss_ce": 0.007017351686954498, + "loss_iou": 0.2392578125, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 176518760, + "step": 1820 + }, + { + "epoch": 0.17804067266327728, + "grad_norm": 36.458908986001795, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 176615916, + "step": 1821 + }, + { + "epoch": 0.17804067266327728, + "loss": 0.1308082640171051, + "loss_ce": 0.004221335984766483, + "loss_iou": 0.41015625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 176615916, + "step": 1821 + }, + { + "epoch": 0.17813844348846306, + "grad_norm": 11.550692471172074, + "learning_rate": 5e-05, + "loss": 0.1258, + "num_input_tokens_seen": 176713120, + "step": 1822 + }, + { + "epoch": 0.17813844348846306, + "loss": 0.13962192833423615, + "loss_ce": 0.00879308208823204, + "loss_iou": 0.474609375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 176713120, + "step": 1822 + }, + { + "epoch": 0.1782362143136488, + "grad_norm": 4.160587704480653, + "learning_rate": 5e-05, + "loss": 0.1019, + "num_input_tokens_seen": 176810548, + "step": 1823 + }, + { + "epoch": 0.1782362143136488, + "loss": 0.11956202983856201, + "loss_ce": 0.004403942264616489, + "loss_iou": 0.380859375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 176810548, + "step": 1823 + }, + { + "epoch": 0.17833398513883458, + "grad_norm": 13.46720343064027, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 176907948, + "step": 1824 + }, + { + "epoch": 0.17833398513883458, + "loss": 0.08679287135601044, + "loss_ce": 0.0029763393104076385, + "loss_iou": 0.443359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 176907948, + "step": 1824 + }, + { + "epoch": 0.17843175596402033, + "grad_norm": 8.641882392936276, + "learning_rate": 5e-05, + "loss": 0.1324, + "num_input_tokens_seen": 177004524, + "step": 1825 + }, + { + "epoch": 0.17843175596402033, + "loss": 0.12403300404548645, + "loss_ce": 0.006967575754970312, + "loss_iou": 0.447265625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 177004524, + "step": 1825 + }, + { + "epoch": 0.1785295267892061, + "grad_norm": 11.880939115463546, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 177101308, + "step": 1826 + }, + { + "epoch": 0.1785295267892061, + "loss": 0.10741305351257324, + "loss_ce": 0.004606959875673056, + "loss_iou": 0.30078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 177101308, + "step": 1826 + }, + { + "epoch": 0.17862729761439186, + "grad_norm": 9.92038503387762, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 177198012, + "step": 1827 + }, + { + "epoch": 0.17862729761439186, + "loss": 0.09427327662706375, + "loss_ce": 0.004063309170305729, + "loss_iou": 0.2060546875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 177198012, + "step": 1827 + }, + { + "epoch": 0.17872506843957764, + "grad_norm": 9.806868010355618, + "learning_rate": 5e-05, + "loss": 0.1246, + "num_input_tokens_seen": 177295892, + "step": 1828 + }, + { + "epoch": 0.17872506843957764, + "loss": 0.10358695685863495, + "loss_ce": 0.00501517578959465, + "loss_iou": 0.373046875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 177295892, + "step": 1828 + }, + { + "epoch": 0.1788228392647634, + "grad_norm": 8.387834234002487, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 177392112, + "step": 1829 + }, + { + "epoch": 0.1788228392647634, + "loss": 0.07139316201210022, + "loss_ce": 0.007962382398545742, + "loss_iou": 0.2158203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 177392112, + "step": 1829 + }, + { + "epoch": 0.17892061008994917, + "grad_norm": 4.569878611476937, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 177488904, + "step": 1830 + }, + { + "epoch": 0.17892061008994917, + "loss": 0.07149966061115265, + "loss_ce": 0.008816555142402649, + "loss_iou": 0.3203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 177488904, + "step": 1830 + }, + { + "epoch": 0.17901838091513492, + "grad_norm": 15.362889084965209, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 177586004, + "step": 1831 + }, + { + "epoch": 0.17901838091513492, + "loss": 0.13036984205245972, + "loss_ce": 0.0053088124841451645, + "loss_iou": 0.357421875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 177586004, + "step": 1831 + }, + { + "epoch": 0.1791161517403207, + "grad_norm": 17.5636464513945, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 177682596, + "step": 1832 + }, + { + "epoch": 0.1791161517403207, + "loss": 0.10257165879011154, + "loss_ce": 0.009035279043018818, + "loss_iou": 0.326171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 177682596, + "step": 1832 + }, + { + "epoch": 0.17921392256550644, + "grad_norm": 9.524006945246274, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 177779392, + "step": 1833 + }, + { + "epoch": 0.17921392256550644, + "loss": 0.08051250874996185, + "loss_ce": 0.007285580970346928, + "loss_iou": 0.32421875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 177779392, + "step": 1833 + }, + { + "epoch": 0.17931169339069222, + "grad_norm": 13.383558369028696, + "learning_rate": 5e-05, + "loss": 0.1355, + "num_input_tokens_seen": 177876100, + "step": 1834 + }, + { + "epoch": 0.17931169339069222, + "loss": 0.14363162219524384, + "loss_ce": 0.005631141364574432, + "loss_iou": 0.353515625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 177876100, + "step": 1834 + }, + { + "epoch": 0.17940946421587797, + "grad_norm": 16.915489223342043, + "learning_rate": 5e-05, + "loss": 0.132, + "num_input_tokens_seen": 177973220, + "step": 1835 + }, + { + "epoch": 0.17940946421587797, + "loss": 0.13584589958190918, + "loss_ce": 0.0014770075213164091, + "loss_iou": 0.341796875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 177973220, + "step": 1835 + }, + { + "epoch": 0.17950723504106375, + "grad_norm": 11.040848519767302, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 178069680, + "step": 1836 + }, + { + "epoch": 0.17950723504106375, + "loss": 0.07539255172014236, + "loss_ce": 0.006758518051356077, + "loss_iou": 0.4296875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 178069680, + "step": 1836 + }, + { + "epoch": 0.1796050058662495, + "grad_norm": 9.076919182470494, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 178167500, + "step": 1837 + }, + { + "epoch": 0.1796050058662495, + "loss": 0.08692102879285812, + "loss_ce": 0.005179693456739187, + "loss_iou": 0.359375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 178167500, + "step": 1837 + }, + { + "epoch": 0.17970277669143528, + "grad_norm": 14.447500601987807, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 178265456, + "step": 1838 + }, + { + "epoch": 0.17970277669143528, + "loss": 0.08728670328855515, + "loss_ce": 0.00325655285269022, + "loss_iou": 0.41015625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 178265456, + "step": 1838 + }, + { + "epoch": 0.17980054751662103, + "grad_norm": 23.710181028709226, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 178362540, + "step": 1839 + }, + { + "epoch": 0.17980054751662103, + "loss": 0.06762003898620605, + "loss_ce": 0.005272631533443928, + "loss_iou": 0.35546875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 178362540, + "step": 1839 + }, + { + "epoch": 0.1798983183418068, + "grad_norm": 12.273151017116419, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 178458748, + "step": 1840 + }, + { + "epoch": 0.1798983183418068, + "loss": 0.1006765365600586, + "loss_ce": 0.007842069491744041, + "loss_iou": 0.205078125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 178458748, + "step": 1840 + }, + { + "epoch": 0.17999608916699256, + "grad_norm": 4.4639102153593315, + "learning_rate": 5e-05, + "loss": 0.1277, + "num_input_tokens_seen": 178556412, + "step": 1841 + }, + { + "epoch": 0.17999608916699256, + "loss": 0.1587231457233429, + "loss_ce": 0.010285651311278343, + "loss_iou": 0.40234375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 178556412, + "step": 1841 + }, + { + "epoch": 0.18009385999217833, + "grad_norm": 4.38056688172814, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 178652284, + "step": 1842 + }, + { + "epoch": 0.18009385999217833, + "loss": 0.075002521276474, + "loss_ce": 0.003484576940536499, + "loss_iou": 0.30078125, + "loss_num": 0.01434326171875, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 178652284, + "step": 1842 + }, + { + "epoch": 0.1801916308173641, + "grad_norm": 24.97571871968182, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 178749896, + "step": 1843 + }, + { + "epoch": 0.1801916308173641, + "loss": 0.09055673331022263, + "loss_ce": 0.008556008338928223, + "loss_iou": 0.28125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 178749896, + "step": 1843 + }, + { + "epoch": 0.18028940164254986, + "grad_norm": 29.812265653852375, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 178847464, + "step": 1844 + }, + { + "epoch": 0.18028940164254986, + "loss": 0.11203087866306305, + "loss_ce": 0.004227524623274803, + "loss_iou": 0.35546875, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 178847464, + "step": 1844 + }, + { + "epoch": 0.18038717246773564, + "grad_norm": 13.098755156480223, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 178944184, + "step": 1845 + }, + { + "epoch": 0.18038717246773564, + "loss": 0.0716906189918518, + "loss_ce": 0.00536829698830843, + "loss_iou": 0.3359375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 178944184, + "step": 1845 + }, + { + "epoch": 0.1804849432929214, + "grad_norm": 24.515374487750854, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 179041876, + "step": 1846 + }, + { + "epoch": 0.1804849432929214, + "loss": 0.08022254705429077, + "loss_ce": 0.00485939159989357, + "loss_iou": 0.42578125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 179041876, + "step": 1846 + }, + { + "epoch": 0.18058271411810717, + "grad_norm": 26.46089365931705, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 179139172, + "step": 1847 + }, + { + "epoch": 0.18058271411810717, + "loss": 0.08945529907941818, + "loss_ce": 0.001702005509287119, + "loss_iou": 0.392578125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 179139172, + "step": 1847 + }, + { + "epoch": 0.18068048494329292, + "grad_norm": 18.020383537751574, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 179235876, + "step": 1848 + }, + { + "epoch": 0.18068048494329292, + "loss": 0.132444366812706, + "loss_ce": 0.013090127147734165, + "loss_iou": 0.3203125, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 179235876, + "step": 1848 + }, + { + "epoch": 0.1807782557684787, + "grad_norm": 7.582952612819603, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 179331896, + "step": 1849 + }, + { + "epoch": 0.1807782557684787, + "loss": 0.10611013323068619, + "loss_ce": 0.007935083471238613, + "loss_iou": 0.279296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 179331896, + "step": 1849 + }, + { + "epoch": 0.18087602659366445, + "grad_norm": 5.580406740697505, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 179428960, + "step": 1850 + }, + { + "epoch": 0.18087602659366445, + "loss": 0.11077439785003662, + "loss_ce": 0.004268051125109196, + "loss_iou": 0.205078125, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 179428960, + "step": 1850 + }, + { + "epoch": 0.18097379741885022, + "grad_norm": 10.514233888657465, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 179525516, + "step": 1851 + }, + { + "epoch": 0.18097379741885022, + "loss": 0.15375857055187225, + "loss_ce": 0.009532498195767403, + "loss_iou": 0.2373046875, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 179525516, + "step": 1851 + }, + { + "epoch": 0.18107156824403597, + "grad_norm": 21.59113386017605, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 179622124, + "step": 1852 + }, + { + "epoch": 0.18107156824403597, + "loss": 0.09154537320137024, + "loss_ce": 0.00615718774497509, + "loss_iou": 0.2734375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 179622124, + "step": 1852 + }, + { + "epoch": 0.18116933906922175, + "grad_norm": 7.367635073467794, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 179719032, + "step": 1853 + }, + { + "epoch": 0.18116933906922175, + "loss": 0.12386200577020645, + "loss_ce": 0.006697399541735649, + "loss_iou": 0.283203125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 179719032, + "step": 1853 + }, + { + "epoch": 0.1812671098944075, + "grad_norm": 6.877451790973836, + "learning_rate": 5e-05, + "loss": 0.1226, + "num_input_tokens_seen": 179816476, + "step": 1854 + }, + { + "epoch": 0.1812671098944075, + "loss": 0.08063621819019318, + "loss_ce": 0.006005481351166964, + "loss_iou": 0.38671875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 179816476, + "step": 1854 + }, + { + "epoch": 0.18136488071959328, + "grad_norm": 14.577477871427503, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 179913160, + "step": 1855 + }, + { + "epoch": 0.18136488071959328, + "loss": 0.10547773540019989, + "loss_ce": 0.007874886505305767, + "loss_iou": 0.2197265625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 179913160, + "step": 1855 + }, + { + "epoch": 0.18146265154477903, + "grad_norm": 3.6478011411986704, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 180010376, + "step": 1856 + }, + { + "epoch": 0.18146265154477903, + "loss": 0.123380646109581, + "loss_ce": 0.010099392384290695, + "loss_iou": 0.263671875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 180010376, + "step": 1856 + }, + { + "epoch": 0.1815604223699648, + "grad_norm": 4.104264979343155, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 180106728, + "step": 1857 + }, + { + "epoch": 0.1815604223699648, + "loss": 0.07434185594320297, + "loss_ce": 0.0017405389808118343, + "loss_iou": 0.373046875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 180106728, + "step": 1857 + }, + { + "epoch": 0.18165819319515056, + "grad_norm": 6.213568071284148, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 180203220, + "step": 1858 + }, + { + "epoch": 0.18165819319515056, + "loss": 0.10405288636684418, + "loss_ce": 0.009692531079053879, + "loss_iou": 0.353515625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 180203220, + "step": 1858 + }, + { + "epoch": 0.18175596402033634, + "grad_norm": 9.31621724941408, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 180300540, + "step": 1859 + }, + { + "epoch": 0.18175596402033634, + "loss": 0.07852118462324142, + "loss_ce": 0.004245210438966751, + "loss_iou": 0.255859375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 180300540, + "step": 1859 + }, + { + "epoch": 0.18185373484552209, + "grad_norm": 13.344150109043387, + "learning_rate": 5e-05, + "loss": 0.122, + "num_input_tokens_seen": 180397668, + "step": 1860 + }, + { + "epoch": 0.18185373484552209, + "loss": 0.1145208477973938, + "loss_ce": 0.003894628258422017, + "loss_iou": 0.279296875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 180397668, + "step": 1860 + }, + { + "epoch": 0.18195150567070786, + "grad_norm": 11.328259709395168, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 180494632, + "step": 1861 + }, + { + "epoch": 0.18195150567070786, + "loss": 0.153621107339859, + "loss_ce": 0.008906738832592964, + "loss_iou": 0.328125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 180494632, + "step": 1861 + }, + { + "epoch": 0.1820492764958936, + "grad_norm": 10.83031330983101, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 180590584, + "step": 1862 + }, + { + "epoch": 0.1820492764958936, + "loss": 0.07087401300668716, + "loss_ce": 0.010266103781759739, + "loss_iou": 0.32421875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 180590584, + "step": 1862 + }, + { + "epoch": 0.1821470473210794, + "grad_norm": 22.54346811156804, + "learning_rate": 5e-05, + "loss": 0.1302, + "num_input_tokens_seen": 180687656, + "step": 1863 + }, + { + "epoch": 0.1821470473210794, + "loss": 0.08222506195306778, + "loss_ce": 0.005458089988678694, + "loss_iou": 0.2412109375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 180687656, + "step": 1863 + }, + { + "epoch": 0.18224481814626514, + "grad_norm": 6.707438095831277, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 180783888, + "step": 1864 + }, + { + "epoch": 0.18224481814626514, + "loss": 0.0890500545501709, + "loss_ce": 0.0082089863717556, + "loss_iou": 0.2177734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 180783888, + "step": 1864 + }, + { + "epoch": 0.18234258897145092, + "grad_norm": 5.180780822448548, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 180880512, + "step": 1865 + }, + { + "epoch": 0.18234258897145092, + "loss": 0.11955071240663528, + "loss_ce": 0.005659112706780434, + "loss_iou": 0.400390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 180880512, + "step": 1865 + }, + { + "epoch": 0.1824403597966367, + "grad_norm": 15.246132739678016, + "learning_rate": 5e-05, + "loss": 0.1301, + "num_input_tokens_seen": 180976472, + "step": 1866 + }, + { + "epoch": 0.1824403597966367, + "loss": 0.08553900569677353, + "loss_ce": 0.0036603454500436783, + "loss_iou": 0.3671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 180976472, + "step": 1866 + }, + { + "epoch": 0.18253813062182245, + "grad_norm": 14.930970975516255, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 181073656, + "step": 1867 + }, + { + "epoch": 0.18253813062182245, + "loss": 0.1230277344584465, + "loss_ce": 0.005382474046200514, + "loss_iou": 0.35546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 181073656, + "step": 1867 + }, + { + "epoch": 0.18263590144700823, + "grad_norm": 7.687242012925195, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 181169108, + "step": 1868 + }, + { + "epoch": 0.18263590144700823, + "loss": 0.0858490988612175, + "loss_ce": 0.008227639831602573, + "loss_iou": 0.185546875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 181169108, + "step": 1868 + }, + { + "epoch": 0.18273367227219398, + "grad_norm": 16.646912937619707, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 181265296, + "step": 1869 + }, + { + "epoch": 0.18273367227219398, + "loss": 0.11763671040534973, + "loss_ce": 0.00884154997766018, + "loss_iou": 0.369140625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 181265296, + "step": 1869 + }, + { + "epoch": 0.18283144309737975, + "grad_norm": 17.58786849289122, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 181363148, + "step": 1870 + }, + { + "epoch": 0.18283144309737975, + "loss": 0.10801701247692108, + "loss_ce": 0.0046539753675460815, + "loss_iou": 0.4296875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 181363148, + "step": 1870 + }, + { + "epoch": 0.1829292139225655, + "grad_norm": 18.41946806295086, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 181459240, + "step": 1871 + }, + { + "epoch": 0.1829292139225655, + "loss": 0.14862015843391418, + "loss_ce": 0.006774452514946461, + "loss_iou": 0.34375, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 181459240, + "step": 1871 + }, + { + "epoch": 0.18302698474775128, + "grad_norm": 16.158153459935953, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 181556940, + "step": 1872 + }, + { + "epoch": 0.18302698474775128, + "loss": 0.05218534171581268, + "loss_ce": 0.005737588740885258, + "loss_iou": 0.294921875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 181556940, + "step": 1872 + }, + { + "epoch": 0.18312475557293703, + "grad_norm": 7.400855296259212, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 181654008, + "step": 1873 + }, + { + "epoch": 0.18312475557293703, + "loss": 0.12090855836868286, + "loss_ce": 0.004369562026113272, + "loss_iou": 0.275390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 181654008, + "step": 1873 + }, + { + "epoch": 0.1832225263981228, + "grad_norm": 26.55560672785595, + "learning_rate": 5e-05, + "loss": 0.1377, + "num_input_tokens_seen": 181751220, + "step": 1874 + }, + { + "epoch": 0.1832225263981228, + "loss": 0.15802554786205292, + "loss_ce": 0.013479039072990417, + "loss_iou": 0.4609375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 181751220, + "step": 1874 + }, + { + "epoch": 0.18332029722330856, + "grad_norm": 20.999522349792326, + "learning_rate": 5e-05, + "loss": 0.115, + "num_input_tokens_seen": 181847644, + "step": 1875 + }, + { + "epoch": 0.18332029722330856, + "loss": 0.11300484836101532, + "loss_ce": 0.005399871151894331, + "loss_iou": 0.408203125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 181847644, + "step": 1875 + }, + { + "epoch": 0.18341806804849434, + "grad_norm": 11.15053512608406, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 181943740, + "step": 1876 + }, + { + "epoch": 0.18341806804849434, + "loss": 0.0983380600810051, + "loss_ce": 0.0058392807841300964, + "loss_iou": 0.419921875, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 181943740, + "step": 1876 + }, + { + "epoch": 0.1835158388736801, + "grad_norm": 3.915387028792288, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 182041008, + "step": 1877 + }, + { + "epoch": 0.1835158388736801, + "loss": 0.08854854851961136, + "loss_ce": 0.007257348392158747, + "loss_iou": 0.32421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 182041008, + "step": 1877 + }, + { + "epoch": 0.18361360969886587, + "grad_norm": 5.152559702238833, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 182137968, + "step": 1878 + }, + { + "epoch": 0.18361360969886587, + "loss": 0.0843420922756195, + "loss_ce": 0.0067664142698049545, + "loss_iou": 0.369140625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 182137968, + "step": 1878 + }, + { + "epoch": 0.18371138052405162, + "grad_norm": 4.966215001706591, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 182235132, + "step": 1879 + }, + { + "epoch": 0.18371138052405162, + "loss": 0.11886890232563019, + "loss_ce": 0.003970225341618061, + "loss_iou": 0.359375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 182235132, + "step": 1879 + }, + { + "epoch": 0.1838091513492374, + "grad_norm": 9.645485137743911, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 182332100, + "step": 1880 + }, + { + "epoch": 0.1838091513492374, + "loss": 0.11484789848327637, + "loss_ce": 0.004313226323574781, + "loss_iou": 0.44921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 182332100, + "step": 1880 + }, + { + "epoch": 0.18390692217442314, + "grad_norm": 13.184460636402333, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 182428932, + "step": 1881 + }, + { + "epoch": 0.18390692217442314, + "loss": 0.08229788392782211, + "loss_ce": 0.004737456329166889, + "loss_iou": 0.3359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 182428932, + "step": 1881 + }, + { + "epoch": 0.18400469299960892, + "grad_norm": 3.7210016741948677, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 182525976, + "step": 1882 + }, + { + "epoch": 0.18400469299960892, + "loss": 0.12939530611038208, + "loss_ce": 0.006302655674517155, + "loss_iou": 0.271484375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 182525976, + "step": 1882 + }, + { + "epoch": 0.18410246382479467, + "grad_norm": 7.51076209201967, + "learning_rate": 5e-05, + "loss": 0.1367, + "num_input_tokens_seen": 182622960, + "step": 1883 + }, + { + "epoch": 0.18410246382479467, + "loss": 0.10651793330907822, + "loss_ce": 0.009990831837058067, + "loss_iou": 0.33203125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 182622960, + "step": 1883 + }, + { + "epoch": 0.18420023464998045, + "grad_norm": 17.782394702231855, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 182720580, + "step": 1884 + }, + { + "epoch": 0.18420023464998045, + "loss": 0.10942361503839493, + "loss_ce": 0.00847146287560463, + "loss_iou": 0.376953125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 182720580, + "step": 1884 + }, + { + "epoch": 0.1842980054751662, + "grad_norm": 16.384321209674315, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 182817840, + "step": 1885 + }, + { + "epoch": 0.1842980054751662, + "loss": 0.11609513312578201, + "loss_ce": 0.004187175538390875, + "loss_iou": 0.1982421875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 182817840, + "step": 1885 + }, + { + "epoch": 0.18439577630035198, + "grad_norm": 12.75068148455661, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 182913720, + "step": 1886 + }, + { + "epoch": 0.18439577630035198, + "loss": 0.09781770408153534, + "loss_ce": 0.0019162154057994485, + "loss_iou": 0.2490234375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 182913720, + "step": 1886 + }, + { + "epoch": 0.18449354712553773, + "grad_norm": 4.717798091156198, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 183010312, + "step": 1887 + }, + { + "epoch": 0.18449354712553773, + "loss": 0.10180000960826874, + "loss_ce": 0.002068563597276807, + "loss_iou": 0.34765625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 183010312, + "step": 1887 + }, + { + "epoch": 0.1845913179507235, + "grad_norm": 14.564761863772544, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 183107932, + "step": 1888 + }, + { + "epoch": 0.1845913179507235, + "loss": 0.07030557841062546, + "loss_ce": 0.004875891841948032, + "loss_iou": 0.310546875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 183107932, + "step": 1888 + }, + { + "epoch": 0.18468908877590928, + "grad_norm": 9.80317346663764, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 183205484, + "step": 1889 + }, + { + "epoch": 0.18468908877590928, + "loss": 0.05934704467654228, + "loss_ce": 0.0029658188577741385, + "loss_iou": 0.34765625, + "loss_num": 0.01123046875, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 183205484, + "step": 1889 + }, + { + "epoch": 0.18478685960109503, + "grad_norm": 13.008403297373162, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 183302588, + "step": 1890 + }, + { + "epoch": 0.18478685960109503, + "loss": 0.08644628524780273, + "loss_ce": 0.006917478982359171, + "loss_iou": 0.39453125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 183302588, + "step": 1890 + }, + { + "epoch": 0.1848846304262808, + "grad_norm": 3.7195769344057075, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 183399912, + "step": 1891 + }, + { + "epoch": 0.1848846304262808, + "loss": 0.06928794085979462, + "loss_ce": 0.0038506234996020794, + "loss_iou": 0.42578125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 183399912, + "step": 1891 + }, + { + "epoch": 0.18498240125146656, + "grad_norm": 9.869299122251155, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 183496232, + "step": 1892 + }, + { + "epoch": 0.18498240125146656, + "loss": 0.08618765324354172, + "loss_ce": 0.006613069213926792, + "loss_iou": 0.2578125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 183496232, + "step": 1892 + }, + { + "epoch": 0.18508017207665234, + "grad_norm": 8.136617584293829, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 183593104, + "step": 1893 + }, + { + "epoch": 0.18508017207665234, + "loss": 0.0778283178806305, + "loss_ce": 0.004433548077940941, + "loss_iou": 0.2890625, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 183593104, + "step": 1893 + }, + { + "epoch": 0.1851779429018381, + "grad_norm": 23.106034848157666, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 183689424, + "step": 1894 + }, + { + "epoch": 0.1851779429018381, + "loss": 0.10237808525562286, + "loss_ce": 0.00593491131439805, + "loss_iou": 0.380859375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 183689424, + "step": 1894 + }, + { + "epoch": 0.18527571372702387, + "grad_norm": 10.471570432469873, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 183787084, + "step": 1895 + }, + { + "epoch": 0.18527571372702387, + "loss": 0.09643152356147766, + "loss_ce": 0.001308227190747857, + "loss_iou": 0.404296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 183787084, + "step": 1895 + }, + { + "epoch": 0.18537348455220962, + "grad_norm": 13.471505137158028, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 183884992, + "step": 1896 + }, + { + "epoch": 0.18537348455220962, + "loss": 0.14122194051742554, + "loss_ce": 0.004564235918223858, + "loss_iou": 0.443359375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 183884992, + "step": 1896 + }, + { + "epoch": 0.1854712553773954, + "grad_norm": 8.680811411546044, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 183982232, + "step": 1897 + }, + { + "epoch": 0.1854712553773954, + "loss": 0.11658872663974762, + "loss_ce": 0.007183214649558067, + "loss_iou": 0.41015625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 183982232, + "step": 1897 + }, + { + "epoch": 0.18556902620258114, + "grad_norm": 8.963458351141748, + "learning_rate": 5e-05, + "loss": 0.1259, + "num_input_tokens_seen": 184079628, + "step": 1898 + }, + { + "epoch": 0.18556902620258114, + "loss": 0.1167830377817154, + "loss_ce": 0.004814040381461382, + "loss_iou": 0.478515625, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 184079628, + "step": 1898 + }, + { + "epoch": 0.18566679702776692, + "grad_norm": 19.87705110049646, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 184176748, + "step": 1899 + }, + { + "epoch": 0.18566679702776692, + "loss": 0.0652199536561966, + "loss_ce": 0.0015831805067136884, + "loss_iou": 0.322265625, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 184176748, + "step": 1899 + }, + { + "epoch": 0.18576456785295267, + "grad_norm": 11.897944106129797, + "learning_rate": 5e-05, + "loss": 0.1276, + "num_input_tokens_seen": 184273756, + "step": 1900 + }, + { + "epoch": 0.18576456785295267, + "loss": 0.050237465649843216, + "loss_ce": 0.00495701190084219, + "loss_iou": 0.2490234375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 184273756, + "step": 1900 + }, + { + "epoch": 0.18586233867813845, + "grad_norm": 3.4344640266400908, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 184370984, + "step": 1901 + }, + { + "epoch": 0.18586233867813845, + "loss": 0.06600992381572723, + "loss_ce": 0.009186194278299809, + "loss_iou": 0.470703125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 184370984, + "step": 1901 + }, + { + "epoch": 0.1859601095033242, + "grad_norm": 8.587388171817112, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 184468096, + "step": 1902 + }, + { + "epoch": 0.1859601095033242, + "loss": 0.14214658737182617, + "loss_ce": 0.006831648759543896, + "loss_iou": 0.3671875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 184468096, + "step": 1902 + }, + { + "epoch": 0.18605788032850998, + "grad_norm": 6.085812087460709, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 184565308, + "step": 1903 + }, + { + "epoch": 0.18605788032850998, + "loss": 0.052680522203445435, + "loss_ce": 0.003932508639991283, + "loss_iou": 0.259765625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 184565308, + "step": 1903 + }, + { + "epoch": 0.18615565115369573, + "grad_norm": 14.33494248633876, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 184662312, + "step": 1904 + }, + { + "epoch": 0.18615565115369573, + "loss": 0.14375874400138855, + "loss_ce": 0.006277048960328102, + "loss_iou": 0.41015625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 184662312, + "step": 1904 + }, + { + "epoch": 0.1862534219788815, + "grad_norm": 4.3231258112788, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 184758556, + "step": 1905 + }, + { + "epoch": 0.1862534219788815, + "loss": 0.07277750968933105, + "loss_ce": 0.004044295288622379, + "loss_iou": 0.27734375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 184758556, + "step": 1905 + }, + { + "epoch": 0.18635119280406726, + "grad_norm": 8.021052383177901, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 184855952, + "step": 1906 + }, + { + "epoch": 0.18635119280406726, + "loss": 0.05841279774904251, + "loss_ce": 0.0035574501380324364, + "loss_iou": 0.43359375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 184855952, + "step": 1906 + }, + { + "epoch": 0.18644896362925303, + "grad_norm": 2.7244746598061824, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 184952556, + "step": 1907 + }, + { + "epoch": 0.18644896362925303, + "loss": 0.12938261032104492, + "loss_ce": 0.005038730800151825, + "loss_iou": 0.28125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 184952556, + "step": 1907 + }, + { + "epoch": 0.18654673445443878, + "grad_norm": 3.4983681557556894, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 185048956, + "step": 1908 + }, + { + "epoch": 0.18654673445443878, + "loss": 0.08612751960754395, + "loss_ce": 0.004950758535414934, + "loss_iou": 0.3125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 185048956, + "step": 1908 + }, + { + "epoch": 0.18664450527962456, + "grad_norm": 6.382770055317294, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 185146088, + "step": 1909 + }, + { + "epoch": 0.18664450527962456, + "loss": 0.08054380118846893, + "loss_ce": 0.0033648479729890823, + "loss_iou": 0.337890625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 185146088, + "step": 1909 + }, + { + "epoch": 0.1867422761048103, + "grad_norm": 9.77501016537137, + "learning_rate": 5e-05, + "loss": 0.1325, + "num_input_tokens_seen": 185242564, + "step": 1910 + }, + { + "epoch": 0.1867422761048103, + "loss": 0.12958532571792603, + "loss_ce": 0.00522620091214776, + "loss_iou": 0.30078125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 185242564, + "step": 1910 + }, + { + "epoch": 0.1868400469299961, + "grad_norm": 41.73598085155693, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 185340628, + "step": 1911 + }, + { + "epoch": 0.1868400469299961, + "loss": 0.11833158880472183, + "loss_ce": 0.0020596194081008434, + "loss_iou": 0.41796875, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 185340628, + "step": 1911 + }, + { + "epoch": 0.18693781775518187, + "grad_norm": 6.029754544797978, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 185438056, + "step": 1912 + }, + { + "epoch": 0.18693781775518187, + "loss": 0.10007084906101227, + "loss_ce": 0.0032385713420808315, + "loss_iou": 0.3984375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 185438056, + "step": 1912 + }, + { + "epoch": 0.18703558858036762, + "grad_norm": 4.740435949109474, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 185535480, + "step": 1913 + }, + { + "epoch": 0.18703558858036762, + "loss": 0.07786108553409576, + "loss_ce": 0.004176396876573563, + "loss_iou": 0.365234375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 185535480, + "step": 1913 + }, + { + "epoch": 0.1871333594055534, + "grad_norm": 12.621865867639444, + "learning_rate": 5e-05, + "loss": 0.1495, + "num_input_tokens_seen": 185631584, + "step": 1914 + }, + { + "epoch": 0.1871333594055534, + "loss": 0.17120468616485596, + "loss_ce": 0.008177886717021465, + "loss_iou": 0.287109375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 185631584, + "step": 1914 + }, + { + "epoch": 0.18723113023073915, + "grad_norm": 17.959305772987346, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 185728968, + "step": 1915 + }, + { + "epoch": 0.18723113023073915, + "loss": 0.09522763639688492, + "loss_ce": 0.008267796598374844, + "loss_iou": 0.37890625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 185728968, + "step": 1915 + }, + { + "epoch": 0.18732890105592492, + "grad_norm": 39.364354360616744, + "learning_rate": 5e-05, + "loss": 0.1225, + "num_input_tokens_seen": 185825264, + "step": 1916 + }, + { + "epoch": 0.18732890105592492, + "loss": 0.12741462886333466, + "loss_ce": 0.00720588956028223, + "loss_iou": 0.3359375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 185825264, + "step": 1916 + }, + { + "epoch": 0.18742667188111067, + "grad_norm": 18.532198904436612, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 185922636, + "step": 1917 + }, + { + "epoch": 0.18742667188111067, + "loss": 0.07213903963565826, + "loss_ce": 0.003558417549356818, + "loss_iou": 0.345703125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 185922636, + "step": 1917 + }, + { + "epoch": 0.18752444270629645, + "grad_norm": 5.471518182516024, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 186019596, + "step": 1918 + }, + { + "epoch": 0.18752444270629645, + "loss": 0.06843014806509018, + "loss_ce": 0.00434323213994503, + "loss_iou": 0.306640625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 186019596, + "step": 1918 + }, + { + "epoch": 0.1876222135314822, + "grad_norm": 12.351764093725373, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 186115872, + "step": 1919 + }, + { + "epoch": 0.1876222135314822, + "loss": 0.09658308327198029, + "loss_ce": 0.007899006828665733, + "loss_iou": 0.265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 186115872, + "step": 1919 + }, + { + "epoch": 0.18771998435666798, + "grad_norm": 3.5921020335315186, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 186213088, + "step": 1920 + }, + { + "epoch": 0.18771998435666798, + "loss": 0.10302157700061798, + "loss_ce": 0.005105921067297459, + "loss_iou": 0.26171875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 186213088, + "step": 1920 + }, + { + "epoch": 0.18781775518185373, + "grad_norm": 18.032634751271075, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 186310448, + "step": 1921 + }, + { + "epoch": 0.18781775518185373, + "loss": 0.1463807225227356, + "loss_ce": 0.01024180743843317, + "loss_iou": 0.451171875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 186310448, + "step": 1921 + }, + { + "epoch": 0.1879155260070395, + "grad_norm": 12.555870075589343, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 186407524, + "step": 1922 + }, + { + "epoch": 0.1879155260070395, + "loss": 0.14792652428150177, + "loss_ce": 0.008461199700832367, + "loss_iou": 0.341796875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 186407524, + "step": 1922 + }, + { + "epoch": 0.18801329683222526, + "grad_norm": 19.25276655246831, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 186504684, + "step": 1923 + }, + { + "epoch": 0.18801329683222526, + "loss": 0.12727445363998413, + "loss_ce": 0.00889676995575428, + "loss_iou": 0.44921875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 186504684, + "step": 1923 + }, + { + "epoch": 0.18811106765741104, + "grad_norm": 9.448650285637592, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 186601660, + "step": 1924 + }, + { + "epoch": 0.18811106765741104, + "loss": 0.14406944811344147, + "loss_ce": 0.004832991398870945, + "loss_iou": 0.38671875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 186601660, + "step": 1924 + }, + { + "epoch": 0.18820883848259679, + "grad_norm": 6.915408261122285, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 186698912, + "step": 1925 + }, + { + "epoch": 0.18820883848259679, + "loss": 0.07854815572500229, + "loss_ce": 0.0016896361485123634, + "loss_iou": 0.44140625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 186698912, + "step": 1925 + }, + { + "epoch": 0.18830660930778256, + "grad_norm": 5.525692592123318, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 186795860, + "step": 1926 + }, + { + "epoch": 0.18830660930778256, + "loss": 0.12449048459529877, + "loss_ce": 0.0054261526092886925, + "loss_iou": 0.3203125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 186795860, + "step": 1926 + }, + { + "epoch": 0.1884043801329683, + "grad_norm": 20.296433069764483, + "learning_rate": 5e-05, + "loss": 0.118, + "num_input_tokens_seen": 186893476, + "step": 1927 + }, + { + "epoch": 0.1884043801329683, + "loss": 0.10829991847276688, + "loss_ce": 0.005501459818333387, + "loss_iou": 0.341796875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 186893476, + "step": 1927 + }, + { + "epoch": 0.1885021509581541, + "grad_norm": 25.641500692412436, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 186990560, + "step": 1928 + }, + { + "epoch": 0.1885021509581541, + "loss": 0.13127954304218292, + "loss_ce": 0.009178709238767624, + "loss_iou": 0.35546875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 186990560, + "step": 1928 + }, + { + "epoch": 0.18859992178333984, + "grad_norm": 9.725479918657136, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 187086980, + "step": 1929 + }, + { + "epoch": 0.18859992178333984, + "loss": 0.1229504942893982, + "loss_ce": 0.00796025712043047, + "loss_iou": 0.392578125, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 187086980, + "step": 1929 + }, + { + "epoch": 0.18869769260852562, + "grad_norm": 2.6161754611421815, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 187183824, + "step": 1930 + }, + { + "epoch": 0.18869769260852562, + "loss": 0.0858367532491684, + "loss_ce": 0.004522666800767183, + "loss_iou": 0.314453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 187183824, + "step": 1930 + }, + { + "epoch": 0.18879546343371137, + "grad_norm": 4.409283809989938, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 187281392, + "step": 1931 + }, + { + "epoch": 0.18879546343371137, + "loss": 0.09751676023006439, + "loss_ce": 0.006574379280209541, + "loss_iou": 0.28515625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 187281392, + "step": 1931 + }, + { + "epoch": 0.18889323425889715, + "grad_norm": 1.8168601189642488, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 187378388, + "step": 1932 + }, + { + "epoch": 0.18889323425889715, + "loss": 0.07679016888141632, + "loss_ce": 0.004516916815191507, + "loss_iou": 0.322265625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 187378388, + "step": 1932 + }, + { + "epoch": 0.1889910050840829, + "grad_norm": 5.318843436062133, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 187475756, + "step": 1933 + }, + { + "epoch": 0.1889910050840829, + "loss": 0.11190501600503922, + "loss_ce": 0.0037507168017327785, + "loss_iou": 0.32421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 187475756, + "step": 1933 + }, + { + "epoch": 0.18908877590926867, + "grad_norm": 4.577897609279506, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 187572548, + "step": 1934 + }, + { + "epoch": 0.18908877590926867, + "loss": 0.06979332119226456, + "loss_ce": 0.002578356768935919, + "loss_iou": 0.34375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 187572548, + "step": 1934 + }, + { + "epoch": 0.18918654673445442, + "grad_norm": 7.005089279842274, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 187669544, + "step": 1935 + }, + { + "epoch": 0.18918654673445442, + "loss": 0.13939660787582397, + "loss_ce": 0.011955207213759422, + "loss_iou": 0.337890625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 187669544, + "step": 1935 + }, + { + "epoch": 0.1892843175596402, + "grad_norm": 11.71023049338863, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 187767288, + "step": 1936 + }, + { + "epoch": 0.1892843175596402, + "loss": 0.12186249345541, + "loss_ce": 0.004858097061514854, + "loss_iou": 0.4453125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 187767288, + "step": 1936 + }, + { + "epoch": 0.18938208838482598, + "grad_norm": 12.358801265462716, + "learning_rate": 5e-05, + "loss": 0.1389, + "num_input_tokens_seen": 187864192, + "step": 1937 + }, + { + "epoch": 0.18938208838482598, + "loss": 0.1120406910777092, + "loss_ce": 0.004428079351782799, + "loss_iou": 0.30859375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 187864192, + "step": 1937 + }, + { + "epoch": 0.18947985921001173, + "grad_norm": 17.675372707432516, + "learning_rate": 5e-05, + "loss": 0.1772, + "num_input_tokens_seen": 187960984, + "step": 1938 + }, + { + "epoch": 0.18947985921001173, + "loss": 0.242363840341568, + "loss_ce": 0.01403132639825344, + "loss_iou": 0.259765625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 187960984, + "step": 1938 + }, + { + "epoch": 0.1895776300351975, + "grad_norm": 10.007466441779574, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 188058780, + "step": 1939 + }, + { + "epoch": 0.1895776300351975, + "loss": 0.1184556633234024, + "loss_ce": 0.00410629715770483, + "loss_iou": 0.4375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 188058780, + "step": 1939 + }, + { + "epoch": 0.18967540086038326, + "grad_norm": 4.161136855175811, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 188156200, + "step": 1940 + }, + { + "epoch": 0.18967540086038326, + "loss": 0.06994481384754181, + "loss_ce": 0.008390855975449085, + "loss_iou": 0.3125, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 188156200, + "step": 1940 + }, + { + "epoch": 0.18977317168556904, + "grad_norm": 12.795937573708926, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 188252508, + "step": 1941 + }, + { + "epoch": 0.18977317168556904, + "loss": 0.11462757736444473, + "loss_ce": 0.004523967858403921, + "loss_iou": 0.3671875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 188252508, + "step": 1941 + }, + { + "epoch": 0.1898709425107548, + "grad_norm": 41.801672419199875, + "learning_rate": 5e-05, + "loss": 0.1158, + "num_input_tokens_seen": 188349440, + "step": 1942 + }, + { + "epoch": 0.1898709425107548, + "loss": 0.12903207540512085, + "loss_ce": 0.008426605723798275, + "loss_iou": 0.271484375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 188349440, + "step": 1942 + }, + { + "epoch": 0.18996871333594056, + "grad_norm": 24.811829713752566, + "learning_rate": 5e-05, + "loss": 0.1509, + "num_input_tokens_seen": 188446088, + "step": 1943 + }, + { + "epoch": 0.18996871333594056, + "loss": 0.16372546553611755, + "loss_ce": 0.0034318806137889624, + "loss_iou": 0.41796875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 188446088, + "step": 1943 + }, + { + "epoch": 0.19006648416112631, + "grad_norm": 16.961234605558992, + "learning_rate": 5e-05, + "loss": 0.192, + "num_input_tokens_seen": 188543620, + "step": 1944 + }, + { + "epoch": 0.19006648416112631, + "loss": 0.24130967259407043, + "loss_ce": 0.01020005065947771, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 188543620, + "step": 1944 + }, + { + "epoch": 0.1901642549863121, + "grad_norm": 13.946123160348291, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 188641004, + "step": 1945 + }, + { + "epoch": 0.1901642549863121, + "loss": 0.07748112082481384, + "loss_ce": 0.0037506558001041412, + "loss_iou": 0.41796875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 188641004, + "step": 1945 + }, + { + "epoch": 0.19026202581149784, + "grad_norm": 4.440612195733634, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 188736816, + "step": 1946 + }, + { + "epoch": 0.19026202581149784, + "loss": 0.11918679624795914, + "loss_ce": 0.0028843015898019075, + "loss_iou": 0.390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 188736816, + "step": 1946 + }, + { + "epoch": 0.19035979663668362, + "grad_norm": 5.070096280103271, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 188833996, + "step": 1947 + }, + { + "epoch": 0.19035979663668362, + "loss": 0.07489858567714691, + "loss_ce": 0.0038231504149734974, + "loss_iou": 0.400390625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 188833996, + "step": 1947 + }, + { + "epoch": 0.19045756746186937, + "grad_norm": 4.443874375507286, + "learning_rate": 5e-05, + "loss": 0.1361, + "num_input_tokens_seen": 188931000, + "step": 1948 + }, + { + "epoch": 0.19045756746186937, + "loss": 0.16874966025352478, + "loss_ce": 0.0030697297770529985, + "loss_iou": 0.423828125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 188931000, + "step": 1948 + }, + { + "epoch": 0.19055533828705515, + "grad_norm": 7.159401062108069, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 189028072, + "step": 1949 + }, + { + "epoch": 0.19055533828705515, + "loss": 0.10563002526760101, + "loss_ce": 0.008782494813203812, + "loss_iou": 0.283203125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 189028072, + "step": 1949 + }, + { + "epoch": 0.1906531091122409, + "grad_norm": 8.026923763111428, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 189124448, + "step": 1950 + }, + { + "epoch": 0.1906531091122409, + "loss": 0.14869791269302368, + "loss_ce": 0.008317047730088234, + "loss_iou": 0.2451171875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 189124448, + "step": 1950 + }, + { + "epoch": 0.19075087993742668, + "grad_norm": 9.886884001281494, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 189221620, + "step": 1951 + }, + { + "epoch": 0.19075087993742668, + "loss": 0.0832538828253746, + "loss_ce": 0.006410623900592327, + "loss_iou": 0.31640625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 189221620, + "step": 1951 + }, + { + "epoch": 0.19084865076261243, + "grad_norm": 5.126929823200711, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 189319252, + "step": 1952 + }, + { + "epoch": 0.19084865076261243, + "loss": 0.08093389123678207, + "loss_ce": 0.005433402955532074, + "loss_iou": 0.306640625, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 189319252, + "step": 1952 + }, + { + "epoch": 0.1909464215877982, + "grad_norm": 6.203792811117276, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 189415828, + "step": 1953 + }, + { + "epoch": 0.1909464215877982, + "loss": 0.07934507727622986, + "loss_ce": 0.0028069852851331234, + "loss_iou": 0.37890625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 189415828, + "step": 1953 + }, + { + "epoch": 0.19104419241298395, + "grad_norm": 5.441935749581308, + "learning_rate": 5e-05, + "loss": 0.1564, + "num_input_tokens_seen": 189513460, + "step": 1954 + }, + { + "epoch": 0.19104419241298395, + "loss": 0.1451098620891571, + "loss_ce": 0.0045764087699353695, + "loss_iou": 0.408203125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 189513460, + "step": 1954 + }, + { + "epoch": 0.19114196323816973, + "grad_norm": 13.386790614923619, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 189610060, + "step": 1955 + }, + { + "epoch": 0.19114196323816973, + "loss": 0.12741681933403015, + "loss_ce": 0.0061094509437680244, + "loss_iou": 0.4609375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 189610060, + "step": 1955 + }, + { + "epoch": 0.19123973406335548, + "grad_norm": 27.9782495231682, + "learning_rate": 5e-05, + "loss": 0.1369, + "num_input_tokens_seen": 189706928, + "step": 1956 + }, + { + "epoch": 0.19123973406335548, + "loss": 0.15811070799827576, + "loss_ce": 0.006194213405251503, + "loss_iou": 0.4375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 189706928, + "step": 1956 + }, + { + "epoch": 0.19133750488854126, + "grad_norm": 13.825398579247265, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 189803600, + "step": 1957 + }, + { + "epoch": 0.19133750488854126, + "loss": 0.11243486404418945, + "loss_ce": 0.006660935468971729, + "loss_iou": 0.353515625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 189803600, + "step": 1957 + }, + { + "epoch": 0.191435275713727, + "grad_norm": 10.76714356893809, + "learning_rate": 5e-05, + "loss": 0.1372, + "num_input_tokens_seen": 189900852, + "step": 1958 + }, + { + "epoch": 0.191435275713727, + "loss": 0.15087434649467468, + "loss_ce": 0.007746894843876362, + "loss_iou": 0.404296875, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 189900852, + "step": 1958 + }, + { + "epoch": 0.1915330465389128, + "grad_norm": 10.867719837213352, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 189996836, + "step": 1959 + }, + { + "epoch": 0.1915330465389128, + "loss": 0.10854437947273254, + "loss_ce": 0.005753546021878719, + "loss_iou": 0.3984375, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 189996836, + "step": 1959 + }, + { + "epoch": 0.19163081736409857, + "grad_norm": 11.469395311755845, + "learning_rate": 5e-05, + "loss": 0.1257, + "num_input_tokens_seen": 190094392, + "step": 1960 + }, + { + "epoch": 0.19163081736409857, + "loss": 0.10379733145236969, + "loss_ce": 0.007453331723809242, + "loss_iou": 0.44921875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 190094392, + "step": 1960 + }, + { + "epoch": 0.19172858818928432, + "grad_norm": 39.3330008561992, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 190191396, + "step": 1961 + }, + { + "epoch": 0.19172858818928432, + "loss": 0.0707734227180481, + "loss_ce": 0.005008043721318245, + "loss_iou": 0.318359375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 190191396, + "step": 1961 + }, + { + "epoch": 0.1918263590144701, + "grad_norm": 8.504775650453597, + "learning_rate": 5e-05, + "loss": 0.1417, + "num_input_tokens_seen": 190288056, + "step": 1962 + }, + { + "epoch": 0.1918263590144701, + "loss": 0.11536978930234909, + "loss_ce": 0.010084143839776516, + "loss_iou": 0.2060546875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 190288056, + "step": 1962 + }, + { + "epoch": 0.19192412983965584, + "grad_norm": 4.6762642749244625, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 190384712, + "step": 1963 + }, + { + "epoch": 0.19192412983965584, + "loss": 0.06489051878452301, + "loss_ce": 0.007898936048150063, + "loss_iou": 0.29296875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 190384712, + "step": 1963 + }, + { + "epoch": 0.19202190066484162, + "grad_norm": 6.2897658653119555, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 190480520, + "step": 1964 + }, + { + "epoch": 0.19202190066484162, + "loss": 0.10298052430152893, + "loss_ce": 0.0065907565876841545, + "loss_iou": 0.2001953125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 190480520, + "step": 1964 + }, + { + "epoch": 0.19211967149002737, + "grad_norm": 13.544333197310898, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 190578128, + "step": 1965 + }, + { + "epoch": 0.19211967149002737, + "loss": 0.18038424849510193, + "loss_ce": 0.005823691841214895, + "loss_iou": 0.3671875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 190578128, + "step": 1965 + }, + { + "epoch": 0.19221744231521315, + "grad_norm": 22.310373898473497, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 190674272, + "step": 1966 + }, + { + "epoch": 0.19221744231521315, + "loss": 0.11388121545314789, + "loss_ce": 0.010029900819063187, + "loss_iou": 0.2421875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 190674272, + "step": 1966 + }, + { + "epoch": 0.1923152131403989, + "grad_norm": 5.557468823922316, + "learning_rate": 5e-05, + "loss": 0.1312, + "num_input_tokens_seen": 190770580, + "step": 1967 + }, + { + "epoch": 0.1923152131403989, + "loss": 0.10652988404035568, + "loss_ce": 0.009804422967135906, + "loss_iou": 0.2216796875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 190770580, + "step": 1967 + }, + { + "epoch": 0.19241298396558468, + "grad_norm": 12.473044296675258, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 190867532, + "step": 1968 + }, + { + "epoch": 0.19241298396558468, + "loss": 0.12120772898197174, + "loss_ce": 0.00705672986805439, + "loss_iou": 0.388671875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 190867532, + "step": 1968 + }, + { + "epoch": 0.19251075479077043, + "grad_norm": 21.27457891869079, + "learning_rate": 5e-05, + "loss": 0.1658, + "num_input_tokens_seen": 190963728, + "step": 1969 + }, + { + "epoch": 0.19251075479077043, + "loss": 0.11616391688585281, + "loss_ce": 0.008375834673643112, + "loss_iou": 0.40625, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 190963728, + "step": 1969 + }, + { + "epoch": 0.1926085256159562, + "grad_norm": 18.184925557359033, + "learning_rate": 5e-05, + "loss": 0.1521, + "num_input_tokens_seen": 191061068, + "step": 1970 + }, + { + "epoch": 0.1926085256159562, + "loss": 0.1475268304347992, + "loss_ce": 0.010777560994029045, + "loss_iou": 0.390625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 191061068, + "step": 1970 + }, + { + "epoch": 0.19270629644114196, + "grad_norm": 13.573596857870019, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 191157748, + "step": 1971 + }, + { + "epoch": 0.19270629644114196, + "loss": 0.15712712705135345, + "loss_ce": 0.010093430988490582, + "loss_iou": 0.298828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 191157748, + "step": 1971 + }, + { + "epoch": 0.19280406726632773, + "grad_norm": 7.402564751776086, + "learning_rate": 5e-05, + "loss": 0.1224, + "num_input_tokens_seen": 191254216, + "step": 1972 + }, + { + "epoch": 0.19280406726632773, + "loss": 0.12078069150447845, + "loss_ce": 0.004752859938889742, + "loss_iou": 0.330078125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 191254216, + "step": 1972 + }, + { + "epoch": 0.19290183809151348, + "grad_norm": 11.022833543990677, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 191350128, + "step": 1973 + }, + { + "epoch": 0.19290183809151348, + "loss": 0.10691957920789719, + "loss_ce": 0.007558159530162811, + "loss_iou": 0.19140625, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 191350128, + "step": 1973 + }, + { + "epoch": 0.19299960891669926, + "grad_norm": 33.98384473627982, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 191448252, + "step": 1974 + }, + { + "epoch": 0.19299960891669926, + "loss": 0.11060110479593277, + "loss_ce": 0.005437529645860195, + "loss_iou": 0.396484375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 191448252, + "step": 1974 + }, + { + "epoch": 0.193097379741885, + "grad_norm": 15.927197855502648, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 191545048, + "step": 1975 + }, + { + "epoch": 0.193097379741885, + "loss": 0.12491748481988907, + "loss_ce": 0.004815548192709684, + "loss_iou": 0.24609375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 191545048, + "step": 1975 + }, + { + "epoch": 0.1931951505670708, + "grad_norm": 21.797420538247497, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 191642564, + "step": 1976 + }, + { + "epoch": 0.1931951505670708, + "loss": 0.12680506706237793, + "loss_ce": 0.00629115616902709, + "loss_iou": 0.322265625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 191642564, + "step": 1976 + }, + { + "epoch": 0.19329292139225654, + "grad_norm": 6.4123011170338495, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 191739644, + "step": 1977 + }, + { + "epoch": 0.19329292139225654, + "loss": 0.10529784113168716, + "loss_ce": 0.011135850101709366, + "loss_iou": 0.369140625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 191739644, + "step": 1977 + }, + { + "epoch": 0.19339069221744232, + "grad_norm": 12.782942936313722, + "learning_rate": 5e-05, + "loss": 0.1261, + "num_input_tokens_seen": 191836768, + "step": 1978 + }, + { + "epoch": 0.19339069221744232, + "loss": 0.09355151653289795, + "loss_ce": 0.008808017708361149, + "loss_iou": 0.296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 191836768, + "step": 1978 + }, + { + "epoch": 0.19348846304262807, + "grad_norm": 13.747806121053737, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 191932548, + "step": 1979 + }, + { + "epoch": 0.19348846304262807, + "loss": 0.10623976588249207, + "loss_ce": 0.006157374940812588, + "loss_iou": 0.2578125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 191932548, + "step": 1979 + }, + { + "epoch": 0.19358623386781385, + "grad_norm": 6.02105871648103, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 192030208, + "step": 1980 + }, + { + "epoch": 0.19358623386781385, + "loss": 0.14339129626750946, + "loss_ce": 0.004872012417763472, + "loss_iou": 0.3203125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 192030208, + "step": 1980 + }, + { + "epoch": 0.1936840046929996, + "grad_norm": 4.260672450262287, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 192127744, + "step": 1981 + }, + { + "epoch": 0.1936840046929996, + "loss": 0.1376977115869522, + "loss_ce": 0.00921870768070221, + "loss_iou": 0.33984375, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 192127744, + "step": 1981 + }, + { + "epoch": 0.19378177551818537, + "grad_norm": 3.449189032646262, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 192224684, + "step": 1982 + }, + { + "epoch": 0.19378177551818537, + "loss": 0.07772643864154816, + "loss_ce": 0.003904420882463455, + "loss_iou": 0.435546875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 192224684, + "step": 1982 + }, + { + "epoch": 0.19387954634337115, + "grad_norm": 6.939331078769035, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 192321208, + "step": 1983 + }, + { + "epoch": 0.19387954634337115, + "loss": 0.10170666873455048, + "loss_ce": 0.005667855031788349, + "loss_iou": 0.40625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 192321208, + "step": 1983 + }, + { + "epoch": 0.1939773171685569, + "grad_norm": 5.145839049592859, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 192419212, + "step": 1984 + }, + { + "epoch": 0.1939773171685569, + "loss": 0.144089013338089, + "loss_ce": 0.005569716915488243, + "loss_iou": 0.375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 192419212, + "step": 1984 + }, + { + "epoch": 0.19407508799374268, + "grad_norm": 8.841615274924909, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 192516920, + "step": 1985 + }, + { + "epoch": 0.19407508799374268, + "loss": 0.10745919495820999, + "loss_ce": 0.003943566232919693, + "loss_iou": 0.3671875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 192516920, + "step": 1985 + }, + { + "epoch": 0.19417285881892843, + "grad_norm": 20.976493591004775, + "learning_rate": 5e-05, + "loss": 0.1486, + "num_input_tokens_seen": 192613704, + "step": 1986 + }, + { + "epoch": 0.19417285881892843, + "loss": 0.13214564323425293, + "loss_ce": 0.008671524003148079, + "loss_iou": 0.369140625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 192613704, + "step": 1986 + }, + { + "epoch": 0.1942706296441142, + "grad_norm": 13.760147502353572, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 192711560, + "step": 1987 + }, + { + "epoch": 0.1942706296441142, + "loss": 0.10381489992141724, + "loss_ce": 0.004495445638895035, + "loss_iou": 0.4921875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 192711560, + "step": 1987 + }, + { + "epoch": 0.19436840046929996, + "grad_norm": 5.638648040852915, + "learning_rate": 5e-05, + "loss": 0.133, + "num_input_tokens_seen": 192809624, + "step": 1988 + }, + { + "epoch": 0.19436840046929996, + "loss": 0.13007210195064545, + "loss_ce": 0.003851403947919607, + "loss_iou": 0.400390625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 192809624, + "step": 1988 + }, + { + "epoch": 0.19446617129448573, + "grad_norm": 4.668419163088091, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 192907364, + "step": 1989 + }, + { + "epoch": 0.19446617129448573, + "loss": 0.10556614398956299, + "loss_ce": 0.0039883810095489025, + "loss_iou": 0.388671875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 192907364, + "step": 1989 + }, + { + "epoch": 0.19456394211967148, + "grad_norm": 4.341232039511796, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 193004312, + "step": 1990 + }, + { + "epoch": 0.19456394211967148, + "loss": 0.09706860780715942, + "loss_ce": 0.004905517213046551, + "loss_iou": 0.3125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 193004312, + "step": 1990 + }, + { + "epoch": 0.19466171294485726, + "grad_norm": 7.5320059300298094, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 193101092, + "step": 1991 + }, + { + "epoch": 0.19466171294485726, + "loss": 0.11930900812149048, + "loss_ce": 0.005783615633845329, + "loss_iou": 0.44140625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 193101092, + "step": 1991 + }, + { + "epoch": 0.194759483770043, + "grad_norm": 6.255402897119782, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 193197892, + "step": 1992 + }, + { + "epoch": 0.194759483770043, + "loss": 0.08071516454219818, + "loss_ce": 0.004985789302736521, + "loss_iou": 0.31640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 193197892, + "step": 1992 + }, + { + "epoch": 0.1948572545952288, + "grad_norm": 4.998207444157522, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 193295064, + "step": 1993 + }, + { + "epoch": 0.1948572545952288, + "loss": 0.10409414768218994, + "loss_ce": 0.005362158641219139, + "loss_iou": 0.30078125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 193295064, + "step": 1993 + }, + { + "epoch": 0.19495502542041454, + "grad_norm": 24.12296983717802, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 193391720, + "step": 1994 + }, + { + "epoch": 0.19495502542041454, + "loss": 0.05797424167394638, + "loss_ce": 0.0017761207418516278, + "loss_iou": 0.318359375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 193391720, + "step": 1994 + }, + { + "epoch": 0.19505279624560032, + "grad_norm": 10.271501226551587, + "learning_rate": 5e-05, + "loss": 0.1372, + "num_input_tokens_seen": 193488572, + "step": 1995 + }, + { + "epoch": 0.19505279624560032, + "loss": 0.08235445618629456, + "loss_ce": 0.005175497382879257, + "loss_iou": 0.396484375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 193488572, + "step": 1995 + }, + { + "epoch": 0.19515056707078607, + "grad_norm": 8.419117185091517, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 193584800, + "step": 1996 + }, + { + "epoch": 0.19515056707078607, + "loss": 0.07831750810146332, + "loss_ce": 0.00493799289688468, + "loss_iou": 0.33203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 193584800, + "step": 1996 + }, + { + "epoch": 0.19524833789597185, + "grad_norm": 14.244239507010475, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 193680916, + "step": 1997 + }, + { + "epoch": 0.19524833789597185, + "loss": 0.15048298239707947, + "loss_ce": 0.00517352856695652, + "loss_iou": 0.333984375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 193680916, + "step": 1997 + }, + { + "epoch": 0.1953461087211576, + "grad_norm": 28.88568620754747, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 193777216, + "step": 1998 + }, + { + "epoch": 0.1953461087211576, + "loss": 0.08862312138080597, + "loss_ce": 0.007721017114818096, + "loss_iou": 0.255859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 193777216, + "step": 1998 + }, + { + "epoch": 0.19544387954634337, + "grad_norm": 32.96045262809219, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 193874524, + "step": 1999 + }, + { + "epoch": 0.19544387954634337, + "loss": 0.07376083731651306, + "loss_ce": 0.005676116794347763, + "loss_iou": 0.5078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 193874524, + "step": 1999 + }, + { + "epoch": 0.19554165037152912, + "grad_norm": 15.416359091305731, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 193971360, + "step": 2000 + }, + { + "epoch": 0.19554165037152912, + "eval_seeclick_CIoU": 0.3186468482017517, + "eval_seeclick_GIoU": 0.3112681359052658, + "eval_seeclick_IoU": 0.40107667446136475, + "eval_seeclick_MAE_all": 0.10669292882084846, + "eval_seeclick_MAE_h": 0.031174536794424057, + "eval_seeclick_MAE_w": 0.1684400737285614, + "eval_seeclick_MAE_x": 0.19487061351537704, + "eval_seeclick_MAE_y": 0.03228648845106363, + "eval_seeclick_NUM_probability": 0.9999838769435883, + "eval_seeclick_inside_bbox": 0.6306818127632141, + "eval_seeclick_loss": 0.3652477264404297, + "eval_seeclick_loss_ce": 0.009715383872389793, + "eval_seeclick_loss_iou": 0.49176025390625, + "eval_seeclick_loss_num": 0.06982421875, + "eval_seeclick_loss_xval": 0.34893798828125, + "eval_seeclick_runtime": 74.7451, + "eval_seeclick_samples_per_second": 0.575, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 193971360, + "step": 2000 + }, + { + "epoch": 0.19554165037152912, + "eval_icons_CIoU": 0.5279911160469055, + "eval_icons_GIoU": 0.5231417268514633, + "eval_icons_IoU": 0.5792263448238373, + "eval_icons_MAE_all": 0.08887455239892006, + "eval_icons_MAE_h": 0.12551812827587128, + "eval_icons_MAE_w": 0.05893457867205143, + "eval_icons_MAE_x": 0.052066270262002945, + "eval_icons_MAE_y": 0.11897922307252884, + "eval_icons_NUM_probability": 0.9998487830162048, + "eval_icons_inside_bbox": 0.7829861044883728, + "eval_icons_loss": 0.2604774832725525, + "eval_icons_loss_ce": 2.6995211555913556e-05, + "eval_icons_loss_iou": 0.41558837890625, + "eval_icons_loss_num": 0.053508758544921875, + "eval_icons_loss_xval": 0.267364501953125, + "eval_icons_runtime": 90.6816, + "eval_icons_samples_per_second": 0.551, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 193971360, + "step": 2000 + }, + { + "epoch": 0.19554165037152912, + "eval_screenspot_CIoU": 0.20616824428240457, + "eval_screenspot_GIoU": 0.18411041299502054, + "eval_screenspot_IoU": 0.31420164306958515, + "eval_screenspot_MAE_all": 0.18949437141418457, + "eval_screenspot_MAE_h": 0.1520841916402181, + "eval_screenspot_MAE_w": 0.25509749352931976, + "eval_screenspot_MAE_x": 0.2142815887928009, + "eval_screenspot_MAE_y": 0.1365142116943995, + "eval_screenspot_NUM_probability": 0.999772330125173, + "eval_screenspot_inside_bbox": 0.5795833269755045, + "eval_screenspot_loss": 0.6370695233345032, + "eval_screenspot_loss_ce": 0.013599398545920849, + "eval_screenspot_loss_iou": 0.3542073567708333, + "eval_screenspot_loss_num": 0.12677001953125, + "eval_screenspot_loss_xval": 0.6341959635416666, + "eval_screenspot_runtime": 145.7336, + "eval_screenspot_samples_per_second": 0.611, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 193971360, + "step": 2000 + }, + { + "epoch": 0.19554165037152912, + "eval_compot_CIoU": 0.34028612077236176, + "eval_compot_GIoU": 0.32377317547798157, + "eval_compot_IoU": 0.4252178221940994, + "eval_compot_MAE_all": 0.11001307517290115, + "eval_compot_MAE_h": 0.09504004195332527, + "eval_compot_MAE_w": 0.12526291608810425, + "eval_compot_MAE_x": 0.11929748952388763, + "eval_compot_MAE_y": 0.10045182704925537, + "eval_compot_NUM_probability": 0.9992649853229523, + "eval_compot_inside_bbox": 0.578125, + "eval_compot_loss": 0.32822906970977783, + "eval_compot_loss_ce": 0.012714313808828592, + "eval_compot_loss_iou": 0.344970703125, + "eval_compot_loss_num": 0.05725860595703125, + "eval_compot_loss_xval": 0.286163330078125, + "eval_compot_runtime": 95.7534, + "eval_compot_samples_per_second": 0.522, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 193971360, + "step": 2000 + }, + { + "epoch": 0.19554165037152912, + "loss": 0.322577565908432, + "loss_ce": 0.01550968550145626, + "loss_iou": 0.33984375, + "loss_num": 0.0615234375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 193971360, + "step": 2000 + }, + { + "epoch": 0.1956394211967149, + "grad_norm": 17.53927908971455, + "learning_rate": 5e-05, + "loss": 0.152, + "num_input_tokens_seen": 194068728, + "step": 2001 + }, + { + "epoch": 0.1956394211967149, + "loss": 0.13857290148735046, + "loss_ce": 0.0071031758561730385, + "loss_iou": 0.39453125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 194068728, + "step": 2001 + }, + { + "epoch": 0.19573719202190065, + "grad_norm": 5.793368439199652, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 194165820, + "step": 2002 + }, + { + "epoch": 0.19573719202190065, + "loss": 0.09011324495077133, + "loss_ce": 0.0032296977005898952, + "loss_iou": 0.3828125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 194165820, + "step": 2002 + }, + { + "epoch": 0.19583496284708643, + "grad_norm": 6.339285000984853, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 194263100, + "step": 2003 + }, + { + "epoch": 0.19583496284708643, + "loss": 0.056934986263513565, + "loss_ce": 0.004734671209007502, + "loss_iou": 0.408203125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 194263100, + "step": 2003 + }, + { + "epoch": 0.19593273367227218, + "grad_norm": 6.61410462750495, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 194360320, + "step": 2004 + }, + { + "epoch": 0.19593273367227218, + "loss": 0.07663760334253311, + "loss_ce": 0.004402492195367813, + "loss_iou": 0.384765625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 194360320, + "step": 2004 + }, + { + "epoch": 0.19603050449745796, + "grad_norm": 5.994514489230971, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 194457172, + "step": 2005 + }, + { + "epoch": 0.19603050449745796, + "loss": 0.07880698889493942, + "loss_ce": 0.007006756961345673, + "loss_iou": 0.34765625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 194457172, + "step": 2005 + }, + { + "epoch": 0.19612827532264374, + "grad_norm": 6.144535942278827, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 194554444, + "step": 2006 + }, + { + "epoch": 0.19612827532264374, + "loss": 0.09815546870231628, + "loss_ce": 0.004252884536981583, + "loss_iou": 0.47265625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 194554444, + "step": 2006 + }, + { + "epoch": 0.19622604614782949, + "grad_norm": 4.252356480111184, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 194651168, + "step": 2007 + }, + { + "epoch": 0.19622604614782949, + "loss": 0.07190442085266113, + "loss_ce": 0.006078007165342569, + "loss_iou": 0.35546875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 194651168, + "step": 2007 + }, + { + "epoch": 0.19632381697301526, + "grad_norm": 2.6654597476935638, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 194747600, + "step": 2008 + }, + { + "epoch": 0.19632381697301526, + "loss": 0.0993829295039177, + "loss_ce": 0.010912466794252396, + "loss_iou": 0.326171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 194747600, + "step": 2008 + }, + { + "epoch": 0.19642158779820101, + "grad_norm": 8.430215360209598, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 194844936, + "step": 2009 + }, + { + "epoch": 0.19642158779820101, + "loss": 0.06486693024635315, + "loss_ce": 0.008729840628802776, + "loss_iou": 0.38671875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 194844936, + "step": 2009 + }, + { + "epoch": 0.1965193586233868, + "grad_norm": 4.917829540661737, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 194942088, + "step": 2010 + }, + { + "epoch": 0.1965193586233868, + "loss": 0.058256909251213074, + "loss_ce": 0.004759598523378372, + "loss_iou": 0.2890625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 194942088, + "step": 2010 + }, + { + "epoch": 0.19661712944857254, + "grad_norm": 3.2368983743273367, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 195038952, + "step": 2011 + }, + { + "epoch": 0.19661712944857254, + "loss": 0.06342949718236923, + "loss_ce": 0.006857538130134344, + "loss_iou": 0.32421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 195038952, + "step": 2011 + }, + { + "epoch": 0.19671490027375832, + "grad_norm": 12.962881548446566, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 195135748, + "step": 2012 + }, + { + "epoch": 0.19671490027375832, + "loss": 0.11827006936073303, + "loss_ce": 0.004439500626176596, + "loss_iou": 0.28125, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 195135748, + "step": 2012 + }, + { + "epoch": 0.19681267109894407, + "grad_norm": 6.419156724103979, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 195232544, + "step": 2013 + }, + { + "epoch": 0.19681267109894407, + "loss": 0.1480170488357544, + "loss_ce": 0.00696480693295598, + "loss_iou": 0.376953125, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 195232544, + "step": 2013 + }, + { + "epoch": 0.19691044192412985, + "grad_norm": 7.15231190345471, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 195329956, + "step": 2014 + }, + { + "epoch": 0.19691044192412985, + "loss": 0.11344093829393387, + "loss_ce": 0.0025400586891919374, + "loss_iou": 0.318359375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 195329956, + "step": 2014 + }, + { + "epoch": 0.1970082127493156, + "grad_norm": 7.481727049938296, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 195426852, + "step": 2015 + }, + { + "epoch": 0.1970082127493156, + "loss": 0.07057404518127441, + "loss_ce": 0.0070211929269135, + "loss_iou": 0.33984375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 195426852, + "step": 2015 + }, + { + "epoch": 0.19710598357450138, + "grad_norm": 10.15522421130865, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 195523660, + "step": 2016 + }, + { + "epoch": 0.19710598357450138, + "loss": 0.0898415595293045, + "loss_ce": 0.005399421323090792, + "loss_iou": 0.310546875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 195523660, + "step": 2016 + }, + { + "epoch": 0.19720375439968713, + "grad_norm": 2.2688649263435896, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 195620932, + "step": 2017 + }, + { + "epoch": 0.19720375439968713, + "loss": 0.06372584402561188, + "loss_ce": 0.0049795061349868774, + "loss_iou": 0.361328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 195620932, + "step": 2017 + }, + { + "epoch": 0.1973015252248729, + "grad_norm": 10.990040286139811, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 195718268, + "step": 2018 + }, + { + "epoch": 0.1973015252248729, + "loss": 0.05493636429309845, + "loss_ce": 0.004506065510213375, + "loss_iou": 0.275390625, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 195718268, + "step": 2018 + }, + { + "epoch": 0.19739929605005865, + "grad_norm": 17.351944882030107, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 195815936, + "step": 2019 + }, + { + "epoch": 0.19739929605005865, + "loss": 0.10583323240280151, + "loss_ce": 0.003217867575585842, + "loss_iou": 0.373046875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 195815936, + "step": 2019 + }, + { + "epoch": 0.19749706687524443, + "grad_norm": 14.168206507585408, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 195912180, + "step": 2020 + }, + { + "epoch": 0.19749706687524443, + "loss": 0.10409164428710938, + "loss_ce": 0.006710050627589226, + "loss_iou": 0.3203125, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 195912180, + "step": 2020 + }, + { + "epoch": 0.19759483770043018, + "grad_norm": 14.080095123103396, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 196008988, + "step": 2021 + }, + { + "epoch": 0.19759483770043018, + "loss": 0.10199550539255142, + "loss_ce": 0.0044308071956038475, + "loss_iou": 0.474609375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 196008988, + "step": 2021 + }, + { + "epoch": 0.19769260852561596, + "grad_norm": 31.95499667750961, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 196105576, + "step": 2022 + }, + { + "epoch": 0.19769260852561596, + "loss": 0.09519803524017334, + "loss_ce": 0.004804966039955616, + "loss_iou": 0.470703125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 196105576, + "step": 2022 + }, + { + "epoch": 0.1977903793508017, + "grad_norm": 16.726188463568704, + "learning_rate": 5e-05, + "loss": 0.1554, + "num_input_tokens_seen": 196202220, + "step": 2023 + }, + { + "epoch": 0.1977903793508017, + "loss": 0.1380329728126526, + "loss_ce": 0.00320630194619298, + "loss_iou": 0.45703125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 196202220, + "step": 2023 + }, + { + "epoch": 0.1978881501759875, + "grad_norm": 8.087642757999957, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 196298028, + "step": 2024 + }, + { + "epoch": 0.1978881501759875, + "loss": 0.1351953148841858, + "loss_ce": 0.006929934024810791, + "loss_iou": 0.3046875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 196298028, + "step": 2024 + }, + { + "epoch": 0.19798592100117324, + "grad_norm": 14.754965866412453, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 196395364, + "step": 2025 + }, + { + "epoch": 0.19798592100117324, + "loss": 0.0885806530714035, + "loss_ce": 0.008472004905343056, + "loss_iou": 0.2890625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 196395364, + "step": 2025 + }, + { + "epoch": 0.19808369182635902, + "grad_norm": 31.048797797763076, + "learning_rate": 5e-05, + "loss": 0.1354, + "num_input_tokens_seen": 196492424, + "step": 2026 + }, + { + "epoch": 0.19808369182635902, + "loss": 0.15171056985855103, + "loss_ce": 0.006130272522568703, + "loss_iou": 0.29296875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 196492424, + "step": 2026 + }, + { + "epoch": 0.19818146265154477, + "grad_norm": 6.196167180523498, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 196589304, + "step": 2027 + }, + { + "epoch": 0.19818146265154477, + "loss": 0.12133625149726868, + "loss_ce": 0.005918774753808975, + "loss_iou": 0.2373046875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 196589304, + "step": 2027 + }, + { + "epoch": 0.19827923347673054, + "grad_norm": 12.182716935205011, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 196686508, + "step": 2028 + }, + { + "epoch": 0.19827923347673054, + "loss": 0.09770678728818893, + "loss_ce": 0.0050554173067212105, + "loss_iou": 0.431640625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 196686508, + "step": 2028 + }, + { + "epoch": 0.19837700430191632, + "grad_norm": 6.148717000773027, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 196783248, + "step": 2029 + }, + { + "epoch": 0.19837700430191632, + "loss": 0.1368054300546646, + "loss_ce": 0.011088269762694836, + "loss_iou": 0.341796875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 196783248, + "step": 2029 + }, + { + "epoch": 0.19847477512710207, + "grad_norm": 7.437606616599103, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 196879916, + "step": 2030 + }, + { + "epoch": 0.19847477512710207, + "loss": 0.09230165183544159, + "loss_ce": 0.004136369563639164, + "loss_iou": 0.34375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 196879916, + "step": 2030 + }, + { + "epoch": 0.19857254595228785, + "grad_norm": 9.312348903878487, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 196977696, + "step": 2031 + }, + { + "epoch": 0.19857254595228785, + "loss": 0.12348054349422455, + "loss_ce": 0.004309400450438261, + "loss_iou": 0.361328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 196977696, + "step": 2031 + }, + { + "epoch": 0.1986703167774736, + "grad_norm": 16.282551447895038, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 197074504, + "step": 2032 + }, + { + "epoch": 0.1986703167774736, + "loss": 0.10152485966682434, + "loss_ce": 0.0023885027039796114, + "loss_iou": 0.38671875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 197074504, + "step": 2032 + }, + { + "epoch": 0.19876808760265938, + "grad_norm": 20.351878965607817, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 197171844, + "step": 2033 + }, + { + "epoch": 0.19876808760265938, + "loss": 0.1386350691318512, + "loss_ce": 0.007928289473056793, + "loss_iou": 0.359375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 197171844, + "step": 2033 + }, + { + "epoch": 0.19886585842784513, + "grad_norm": 16.388843961155388, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 197269076, + "step": 2034 + }, + { + "epoch": 0.19886585842784513, + "loss": 0.0659937709569931, + "loss_ce": 0.0031580787617713213, + "loss_iou": 0.29296875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 197269076, + "step": 2034 + }, + { + "epoch": 0.1989636292530309, + "grad_norm": 7.162324981257405, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 197366524, + "step": 2035 + }, + { + "epoch": 0.1989636292530309, + "loss": 0.06827984750270844, + "loss_ce": 0.005871396511793137, + "loss_iou": 0.42578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 197366524, + "step": 2035 + }, + { + "epoch": 0.19906140007821665, + "grad_norm": 8.031019712327474, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 197464384, + "step": 2036 + }, + { + "epoch": 0.19906140007821665, + "loss": 0.10386519134044647, + "loss_ce": 0.0025163115933537483, + "loss_iou": 0.38671875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 197464384, + "step": 2036 + }, + { + "epoch": 0.19915917090340243, + "grad_norm": 19.892591078410188, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 197561412, + "step": 2037 + }, + { + "epoch": 0.19915917090340243, + "loss": 0.0947653278708458, + "loss_ce": 0.0034948866814374924, + "loss_iou": 0.396484375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 197561412, + "step": 2037 + }, + { + "epoch": 0.19925694172858818, + "grad_norm": 14.479835789273833, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 197658568, + "step": 2038 + }, + { + "epoch": 0.19925694172858818, + "loss": 0.09866975247859955, + "loss_ce": 0.0032718093134462833, + "loss_iou": 0.349609375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 197658568, + "step": 2038 + }, + { + "epoch": 0.19935471255377396, + "grad_norm": 16.206156125155374, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 197756000, + "step": 2039 + }, + { + "epoch": 0.19935471255377396, + "loss": 0.07708339393138885, + "loss_ce": 0.005275537725538015, + "loss_iou": 0.318359375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 197756000, + "step": 2039 + }, + { + "epoch": 0.1994524833789597, + "grad_norm": 15.692043403253072, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 197854412, + "step": 2040 + }, + { + "epoch": 0.1994524833789597, + "loss": 0.12583956122398376, + "loss_ce": 0.006515816319733858, + "loss_iou": 0.32421875, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 197854412, + "step": 2040 + }, + { + "epoch": 0.1995502542041455, + "grad_norm": 15.525580376736283, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 197951016, + "step": 2041 + }, + { + "epoch": 0.1995502542041455, + "loss": 0.12015342712402344, + "loss_ce": 0.0036373138427734375, + "loss_iou": 0.3046875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 197951016, + "step": 2041 + }, + { + "epoch": 0.19964802502933124, + "grad_norm": 15.349064296916367, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 198047116, + "step": 2042 + }, + { + "epoch": 0.19964802502933124, + "loss": 0.09535664319992065, + "loss_ce": 0.0051161679439246655, + "loss_iou": 0.3203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 198047116, + "step": 2042 + }, + { + "epoch": 0.19974579585451702, + "grad_norm": 15.210867583730984, + "learning_rate": 5e-05, + "loss": 0.1721, + "num_input_tokens_seen": 198144496, + "step": 2043 + }, + { + "epoch": 0.19974579585451702, + "loss": 0.1759471744298935, + "loss_ce": 0.006391511298716068, + "loss_iou": 0.400390625, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 198144496, + "step": 2043 + }, + { + "epoch": 0.19984356667970277, + "grad_norm": 10.136514562393225, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 198241856, + "step": 2044 + }, + { + "epoch": 0.19984356667970277, + "loss": 0.12495223432779312, + "loss_ce": 0.007886797189712524, + "loss_iou": 0.345703125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 198241856, + "step": 2044 + }, + { + "epoch": 0.19994133750488854, + "grad_norm": 17.129234892048075, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 198338140, + "step": 2045 + }, + { + "epoch": 0.19994133750488854, + "loss": 0.08402656018733978, + "loss_ce": 0.003521181643009186, + "loss_iou": 0.38671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 198338140, + "step": 2045 + }, + { + "epoch": 0.2000391083300743, + "grad_norm": 9.452216970773566, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 198434400, + "step": 2046 + }, + { + "epoch": 0.2000391083300743, + "loss": 0.047969359904527664, + "loss_ce": 0.003932494204491377, + "loss_iou": 0.412109375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 198434400, + "step": 2046 + }, + { + "epoch": 0.20013687915526007, + "grad_norm": 7.541876023439198, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 198531528, + "step": 2047 + }, + { + "epoch": 0.20013687915526007, + "loss": 0.09603964537382126, + "loss_ce": 0.005860203877091408, + "loss_iou": 0.416015625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 198531528, + "step": 2047 + }, + { + "epoch": 0.20023464998044582, + "grad_norm": 16.617949015313783, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 198628528, + "step": 2048 + }, + { + "epoch": 0.20023464998044582, + "loss": 0.09911307692527771, + "loss_ce": 0.006003950722515583, + "loss_iou": 0.322265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 198628528, + "step": 2048 + }, + { + "epoch": 0.2003324208056316, + "grad_norm": 9.824731327497869, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 198725084, + "step": 2049 + }, + { + "epoch": 0.2003324208056316, + "loss": 0.09302429854869843, + "loss_ce": 0.003645945806056261, + "loss_iou": 0.28515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 198725084, + "step": 2049 + }, + { + "epoch": 0.20043019163081735, + "grad_norm": 31.92462378596863, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 198821256, + "step": 2050 + }, + { + "epoch": 0.20043019163081735, + "loss": 0.07123291492462158, + "loss_ce": 0.004185798112303019, + "loss_iou": 0.302734375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 198821256, + "step": 2050 + }, + { + "epoch": 0.20052796245600313, + "grad_norm": 14.117873866875632, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 198917756, + "step": 2051 + }, + { + "epoch": 0.20052796245600313, + "loss": 0.07604517042636871, + "loss_ce": 0.004710334353148937, + "loss_iou": 0.294921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 198917756, + "step": 2051 + }, + { + "epoch": 0.2006257332811889, + "grad_norm": 10.215857658709638, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 199014980, + "step": 2052 + }, + { + "epoch": 0.2006257332811889, + "loss": 0.09988579899072647, + "loss_ce": 0.0066393413580954075, + "loss_iou": 0.3515625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 199014980, + "step": 2052 + }, + { + "epoch": 0.20072350410637466, + "grad_norm": 34.97325916199007, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 199112200, + "step": 2053 + }, + { + "epoch": 0.20072350410637466, + "loss": 0.11525993049144745, + "loss_ce": 0.0025890374090522528, + "loss_iou": 0.34765625, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 199112200, + "step": 2053 + }, + { + "epoch": 0.20082127493156043, + "grad_norm": 12.909717335111432, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 199208716, + "step": 2054 + }, + { + "epoch": 0.20082127493156043, + "loss": 0.09090542048215866, + "loss_ce": 0.006524315103888512, + "loss_iou": 0.267578125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 199208716, + "step": 2054 + }, + { + "epoch": 0.20091904575674618, + "grad_norm": 11.890940338798496, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 199306196, + "step": 2055 + }, + { + "epoch": 0.20091904575674618, + "loss": 0.07483803480863571, + "loss_ce": 0.003381126094609499, + "loss_iou": 0.32421875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 199306196, + "step": 2055 + }, + { + "epoch": 0.20101681658193196, + "grad_norm": 10.19373589079763, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 199402688, + "step": 2056 + }, + { + "epoch": 0.20101681658193196, + "loss": 0.07755698263645172, + "loss_ce": 0.004986185114830732, + "loss_iou": 0.421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 199402688, + "step": 2056 + }, + { + "epoch": 0.2011145874071177, + "grad_norm": 18.714418364198053, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 199499596, + "step": 2057 + }, + { + "epoch": 0.2011145874071177, + "loss": 0.10919708013534546, + "loss_ce": 0.0044149779714643955, + "loss_iou": 0.462890625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 199499596, + "step": 2057 + }, + { + "epoch": 0.2012123582323035, + "grad_norm": 53.50630592355089, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 199596956, + "step": 2058 + }, + { + "epoch": 0.2012123582323035, + "loss": 0.15465721487998962, + "loss_ce": 0.007623520214110613, + "loss_iou": 0.455078125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 199596956, + "step": 2058 + }, + { + "epoch": 0.20131012905748924, + "grad_norm": 24.827806657553683, + "learning_rate": 5e-05, + "loss": 0.1337, + "num_input_tokens_seen": 199694156, + "step": 2059 + }, + { + "epoch": 0.20131012905748924, + "loss": 0.09173491597175598, + "loss_ce": 0.004439391661435366, + "loss_iou": 0.40625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 199694156, + "step": 2059 + }, + { + "epoch": 0.20140789988267502, + "grad_norm": 9.798227557207019, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 199790152, + "step": 2060 + }, + { + "epoch": 0.20140789988267502, + "loss": 0.09721718728542328, + "loss_ce": 0.006167992018163204, + "loss_iou": 0.35546875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 199790152, + "step": 2060 + }, + { + "epoch": 0.20150567070786077, + "grad_norm": 3.6875869033375244, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 199887820, + "step": 2061 + }, + { + "epoch": 0.20150567070786077, + "loss": 0.12214212119579315, + "loss_ce": 0.006694122217595577, + "loss_iou": 0.32421875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 199887820, + "step": 2061 + }, + { + "epoch": 0.20160344153304655, + "grad_norm": 13.527560578635212, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 199985336, + "step": 2062 + }, + { + "epoch": 0.20160344153304655, + "loss": 0.0891474261879921, + "loss_ce": 0.005468228831887245, + "loss_iou": 0.40234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 199985336, + "step": 2062 + }, + { + "epoch": 0.2017012123582323, + "grad_norm": 5.925167120187272, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 200081948, + "step": 2063 + }, + { + "epoch": 0.2017012123582323, + "loss": 0.1355876922607422, + "loss_ce": 0.004911421798169613, + "loss_iou": 0.373046875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 200081948, + "step": 2063 + }, + { + "epoch": 0.20179898318341807, + "grad_norm": 2.8730636758960673, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 200178044, + "step": 2064 + }, + { + "epoch": 0.20179898318341807, + "loss": 0.08078368008136749, + "loss_ce": 0.0019262558780610561, + "loss_iou": 0.298828125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 200178044, + "step": 2064 + }, + { + "epoch": 0.20189675400860382, + "grad_norm": 22.261280397586436, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 200275192, + "step": 2065 + }, + { + "epoch": 0.20189675400860382, + "loss": 0.09074096381664276, + "loss_ce": 0.007824699394404888, + "loss_iou": 0.28125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 200275192, + "step": 2065 + }, + { + "epoch": 0.2019945248337896, + "grad_norm": 15.825346347147537, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 200372232, + "step": 2066 + }, + { + "epoch": 0.2019945248337896, + "loss": 0.09782141447067261, + "loss_ce": 0.006566223688423634, + "loss_iou": 0.3203125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 200372232, + "step": 2066 + }, + { + "epoch": 0.20209229565897535, + "grad_norm": 24.378451200052712, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 200469092, + "step": 2067 + }, + { + "epoch": 0.20209229565897535, + "loss": 0.1291874647140503, + "loss_ce": 0.009512778371572495, + "loss_iou": 0.3515625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 200469092, + "step": 2067 + }, + { + "epoch": 0.20219006648416113, + "grad_norm": 40.761106638439095, + "learning_rate": 5e-05, + "loss": 0.1247, + "num_input_tokens_seen": 200566624, + "step": 2068 + }, + { + "epoch": 0.20219006648416113, + "loss": 0.12677840888500214, + "loss_ce": 0.007607264909893274, + "loss_iou": 0.3359375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 200566624, + "step": 2068 + }, + { + "epoch": 0.20228783730934688, + "grad_norm": 5.428282320843226, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 200663904, + "step": 2069 + }, + { + "epoch": 0.20228783730934688, + "loss": 0.0956551730632782, + "loss_ce": 0.007104606367647648, + "loss_iou": 0.376953125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 200663904, + "step": 2069 + }, + { + "epoch": 0.20238560813453266, + "grad_norm": 5.138223490530401, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 200760516, + "step": 2070 + }, + { + "epoch": 0.20238560813453266, + "loss": 0.07211622595787048, + "loss_ce": 0.005381908733397722, + "loss_iou": 0.3203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 200760516, + "step": 2070 + }, + { + "epoch": 0.2024833789597184, + "grad_norm": 19.40102345217591, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 200857584, + "step": 2071 + }, + { + "epoch": 0.2024833789597184, + "loss": 0.0786285251379013, + "loss_ce": 0.005264272913336754, + "loss_iou": 0.328125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 200857584, + "step": 2071 + }, + { + "epoch": 0.20258114978490419, + "grad_norm": 10.34634185419867, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 200953984, + "step": 2072 + }, + { + "epoch": 0.20258114978490419, + "loss": 0.1071341261267662, + "loss_ce": 0.005968352779746056, + "loss_iou": 0.318359375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 200953984, + "step": 2072 + }, + { + "epoch": 0.20267892061008994, + "grad_norm": 12.387304832046926, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 201051780, + "step": 2073 + }, + { + "epoch": 0.20267892061008994, + "loss": 0.08487202227115631, + "loss_ce": 0.0038783750496804714, + "loss_iou": 0.3984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 201051780, + "step": 2073 + }, + { + "epoch": 0.2027766914352757, + "grad_norm": 12.544996214820294, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 201147584, + "step": 2074 + }, + { + "epoch": 0.2027766914352757, + "loss": 0.11044880747795105, + "loss_ce": 0.005613293498754501, + "loss_iou": 0.384765625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 201147584, + "step": 2074 + }, + { + "epoch": 0.2028744622604615, + "grad_norm": 11.646860746327247, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 201245100, + "step": 2075 + }, + { + "epoch": 0.2028744622604615, + "loss": 0.09636799991130829, + "loss_ce": 0.005654496140778065, + "loss_iou": 0.3984375, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 201245100, + "step": 2075 + }, + { + "epoch": 0.20297223308564724, + "grad_norm": 12.08464524693234, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 201341964, + "step": 2076 + }, + { + "epoch": 0.20297223308564724, + "loss": 0.09121115505695343, + "loss_ce": 0.0030611352995038033, + "loss_iou": 0.33203125, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 201341964, + "step": 2076 + }, + { + "epoch": 0.20307000391083302, + "grad_norm": 9.573480775603366, + "learning_rate": 5e-05, + "loss": 0.1535, + "num_input_tokens_seen": 201439568, + "step": 2077 + }, + { + "epoch": 0.20307000391083302, + "loss": 0.15801399946212769, + "loss_ce": 0.011117629706859589, + "loss_iou": 0.392578125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 201439568, + "step": 2077 + }, + { + "epoch": 0.20316777473601877, + "grad_norm": 10.469352403257533, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 201536068, + "step": 2078 + }, + { + "epoch": 0.20316777473601877, + "loss": 0.1046164482831955, + "loss_ce": 0.009050657041370869, + "loss_iou": 0.357421875, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 201536068, + "step": 2078 + }, + { + "epoch": 0.20326554556120455, + "grad_norm": 4.586936251185234, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 201632376, + "step": 2079 + }, + { + "epoch": 0.20326554556120455, + "loss": 0.16235114634037018, + "loss_ce": 0.007321842946112156, + "loss_iou": 0.32421875, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 201632376, + "step": 2079 + }, + { + "epoch": 0.2033633163863903, + "grad_norm": 5.83218370781032, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 201729632, + "step": 2080 + }, + { + "epoch": 0.2033633163863903, + "loss": 0.10794786363840103, + "loss_ce": 0.00473741302266717, + "loss_iou": 0.390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 201729632, + "step": 2080 + }, + { + "epoch": 0.20346108721157607, + "grad_norm": 5.60600556977451, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 201827288, + "step": 2081 + }, + { + "epoch": 0.20346108721157607, + "loss": 0.08149182796478271, + "loss_ce": 0.004190800245851278, + "loss_iou": 0.427734375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 201827288, + "step": 2081 + }, + { + "epoch": 0.20355885803676182, + "grad_norm": 7.506820441933553, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 201924676, + "step": 2082 + }, + { + "epoch": 0.20355885803676182, + "loss": 0.06337877362966537, + "loss_ce": 0.004922356456518173, + "loss_iou": 0.357421875, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 201924676, + "step": 2082 + }, + { + "epoch": 0.2036566288619476, + "grad_norm": 4.046720710856237, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 202021464, + "step": 2083 + }, + { + "epoch": 0.2036566288619476, + "loss": 0.09538625180721283, + "loss_ce": 0.009052025154232979, + "loss_iou": 0.390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 202021464, + "step": 2083 + }, + { + "epoch": 0.20375439968713335, + "grad_norm": 7.000089376075297, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 202118400, + "step": 2084 + }, + { + "epoch": 0.20375439968713335, + "loss": 0.11056967079639435, + "loss_ce": 0.007542327977716923, + "loss_iou": 0.33984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 202118400, + "step": 2084 + }, + { + "epoch": 0.20385217051231913, + "grad_norm": 6.7932664890827175, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 202215948, + "step": 2085 + }, + { + "epoch": 0.20385217051231913, + "loss": 0.0426190085709095, + "loss_ce": 0.0035107301082462072, + "loss_iou": 0.32421875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 202215948, + "step": 2085 + }, + { + "epoch": 0.20394994133750488, + "grad_norm": 21.466218242424834, + "learning_rate": 5e-05, + "loss": 0.1272, + "num_input_tokens_seen": 202312792, + "step": 2086 + }, + { + "epoch": 0.20394994133750488, + "loss": 0.12653203308582306, + "loss_ce": 0.00823064986616373, + "loss_iou": 0.291015625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 202312792, + "step": 2086 + }, + { + "epoch": 0.20404771216269066, + "grad_norm": 5.087427759405421, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 202409900, + "step": 2087 + }, + { + "epoch": 0.20404771216269066, + "loss": 0.08493032306432724, + "loss_ce": 0.00606909254565835, + "loss_iou": 0.322265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 202409900, + "step": 2087 + }, + { + "epoch": 0.2041454829878764, + "grad_norm": 3.59799878998667, + "learning_rate": 5e-05, + "loss": 0.1391, + "num_input_tokens_seen": 202506432, + "step": 2088 + }, + { + "epoch": 0.2041454829878764, + "loss": 0.1509949266910553, + "loss_ce": 0.010003703646361828, + "loss_iou": 0.310546875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 202506432, + "step": 2088 + }, + { + "epoch": 0.2042432538130622, + "grad_norm": 8.063062407692696, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 202603860, + "step": 2089 + }, + { + "epoch": 0.2042432538130622, + "loss": 0.1148819699883461, + "loss_ce": 0.008635020814836025, + "loss_iou": 0.328125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 202603860, + "step": 2089 + }, + { + "epoch": 0.20434102463824794, + "grad_norm": 2.6639593847959815, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 202701104, + "step": 2090 + }, + { + "epoch": 0.20434102463824794, + "loss": 0.10449901223182678, + "loss_ce": 0.006980087608098984, + "loss_iou": 0.359375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 202701104, + "step": 2090 + }, + { + "epoch": 0.20443879546343371, + "grad_norm": 6.266731741197929, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 202798724, + "step": 2091 + }, + { + "epoch": 0.20443879546343371, + "loss": 0.09467622637748718, + "loss_ce": 0.0029556467197835445, + "loss_iou": 0.271484375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 202798724, + "step": 2091 + }, + { + "epoch": 0.20453656628861946, + "grad_norm": 2.278354793628632, + "learning_rate": 5e-05, + "loss": 0.1082, + "num_input_tokens_seen": 202896684, + "step": 2092 + }, + { + "epoch": 0.20453656628861946, + "loss": 0.07256625592708588, + "loss_ce": 0.0019409526139497757, + "loss_iou": 0.28125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 202896684, + "step": 2092 + }, + { + "epoch": 0.20463433711380524, + "grad_norm": 4.002837566434292, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 202993708, + "step": 2093 + }, + { + "epoch": 0.20463433711380524, + "loss": 0.041415587067604065, + "loss_ce": 0.009089839644730091, + "loss_iou": 0.244140625, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 202993708, + "step": 2093 + }, + { + "epoch": 0.204732107938991, + "grad_norm": 5.670046276033949, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 203090852, + "step": 2094 + }, + { + "epoch": 0.204732107938991, + "loss": 0.07130013406276703, + "loss_ce": 0.0013996241614222527, + "loss_iou": 0.333984375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 203090852, + "step": 2094 + }, + { + "epoch": 0.20482987876417677, + "grad_norm": 7.6989037834117715, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 203187268, + "step": 2095 + }, + { + "epoch": 0.20482987876417677, + "loss": 0.08734185993671417, + "loss_ce": 0.0039373161271214485, + "loss_iou": 0.31640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 203187268, + "step": 2095 + }, + { + "epoch": 0.20492764958936252, + "grad_norm": 4.538108908055291, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 203283328, + "step": 2096 + }, + { + "epoch": 0.20492764958936252, + "loss": 0.09987647831439972, + "loss_ce": 0.005447467789053917, + "loss_iou": 0.31640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 203283328, + "step": 2096 + }, + { + "epoch": 0.2050254204145483, + "grad_norm": 3.042326474465541, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 203379692, + "step": 2097 + }, + { + "epoch": 0.2050254204145483, + "loss": 0.11751683801412582, + "loss_ce": 0.004090634174644947, + "loss_iou": 0.33203125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 203379692, + "step": 2097 + }, + { + "epoch": 0.20512319123973408, + "grad_norm": 5.977957852897276, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 203477720, + "step": 2098 + }, + { + "epoch": 0.20512319123973408, + "loss": 0.13860079646110535, + "loss_ce": 0.007314174436032772, + "loss_iou": 0.302734375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 203477720, + "step": 2098 + }, + { + "epoch": 0.20522096206491983, + "grad_norm": 5.7780221466708355, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 203574748, + "step": 2099 + }, + { + "epoch": 0.20522096206491983, + "loss": 0.11352820694446564, + "loss_ce": 0.006243654526770115, + "loss_iou": 0.34765625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 203574748, + "step": 2099 + }, + { + "epoch": 0.2053187328901056, + "grad_norm": 13.339980420325215, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 203672080, + "step": 2100 + }, + { + "epoch": 0.2053187328901056, + "loss": 0.13267788290977478, + "loss_ce": 0.006487683393061161, + "loss_iou": 0.279296875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 203672080, + "step": 2100 + }, + { + "epoch": 0.20541650371529135, + "grad_norm": 6.113454714116408, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 203768564, + "step": 2101 + }, + { + "epoch": 0.20541650371529135, + "loss": 0.09939678013324738, + "loss_ce": 0.009583543054759502, + "loss_iou": 0.34765625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 203768564, + "step": 2101 + }, + { + "epoch": 0.20551427454047713, + "grad_norm": 11.699203358890031, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 203865440, + "step": 2102 + }, + { + "epoch": 0.20551427454047713, + "loss": 0.09788279235363007, + "loss_ce": 0.009137678891420364, + "loss_iou": 0.376953125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 203865440, + "step": 2102 + }, + { + "epoch": 0.20561204536566288, + "grad_norm": 9.144524089541129, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 203962004, + "step": 2103 + }, + { + "epoch": 0.20561204536566288, + "loss": 0.1169780045747757, + "loss_ce": 0.007206276059150696, + "loss_iou": 0.2734375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 203962004, + "step": 2103 + }, + { + "epoch": 0.20570981619084866, + "grad_norm": 11.758118456873529, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 204058828, + "step": 2104 + }, + { + "epoch": 0.20570981619084866, + "loss": 0.09863090515136719, + "loss_ce": 0.005918504670262337, + "loss_iou": 0.453125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 204058828, + "step": 2104 + }, + { + "epoch": 0.2058075870160344, + "grad_norm": 22.579202452028046, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 204155532, + "step": 2105 + }, + { + "epoch": 0.2058075870160344, + "loss": 0.13094094395637512, + "loss_ce": 0.006261385977268219, + "loss_iou": 0.35546875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 204155532, + "step": 2105 + }, + { + "epoch": 0.2059053578412202, + "grad_norm": 19.206296508606297, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 204252200, + "step": 2106 + }, + { + "epoch": 0.2059053578412202, + "loss": 0.13891485333442688, + "loss_ce": 0.00964239239692688, + "loss_iou": 0.3515625, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 204252200, + "step": 2106 + }, + { + "epoch": 0.20600312866640594, + "grad_norm": 17.01536565015297, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 204350232, + "step": 2107 + }, + { + "epoch": 0.20600312866640594, + "loss": 0.07573558390140533, + "loss_ce": 0.0019288205076009035, + "loss_iou": 0.36328125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 204350232, + "step": 2107 + }, + { + "epoch": 0.20610089949159172, + "grad_norm": 6.121981240522727, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 204447568, + "step": 2108 + }, + { + "epoch": 0.20610089949159172, + "loss": 0.1180562898516655, + "loss_ce": 0.00962732546031475, + "loss_iou": 0.326171875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 204447568, + "step": 2108 + }, + { + "epoch": 0.20619867031677747, + "grad_norm": 20.29444210208059, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 204544288, + "step": 2109 + }, + { + "epoch": 0.20619867031677747, + "loss": 0.13833680748939514, + "loss_ce": 0.004334114491939545, + "loss_iou": 0.265625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 204544288, + "step": 2109 + }, + { + "epoch": 0.20629644114196324, + "grad_norm": 11.885924129050194, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 204641704, + "step": 2110 + }, + { + "epoch": 0.20629644114196324, + "loss": 0.11355330049991608, + "loss_ce": 0.002988115418702364, + "loss_iou": 0.365234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 204641704, + "step": 2110 + }, + { + "epoch": 0.206394211967149, + "grad_norm": 8.096323328782345, + "learning_rate": 5e-05, + "loss": 0.1298, + "num_input_tokens_seen": 204737924, + "step": 2111 + }, + { + "epoch": 0.206394211967149, + "loss": 0.15818363428115845, + "loss_ce": 0.005076948553323746, + "loss_iou": 0.35546875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 204737924, + "step": 2111 + }, + { + "epoch": 0.20649198279233477, + "grad_norm": 30.012338566581857, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 204833724, + "step": 2112 + }, + { + "epoch": 0.20649198279233477, + "loss": 0.08630149811506271, + "loss_ce": 0.0018822482088580728, + "loss_iou": 0.359375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 204833724, + "step": 2112 + }, + { + "epoch": 0.20658975361752052, + "grad_norm": 4.354923384143403, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 204929576, + "step": 2113 + }, + { + "epoch": 0.20658975361752052, + "loss": 0.12723544239997864, + "loss_ce": 0.010597260668873787, + "loss_iou": 0.212890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 204929576, + "step": 2113 + }, + { + "epoch": 0.2066875244427063, + "grad_norm": 4.172269434184279, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 205026628, + "step": 2114 + }, + { + "epoch": 0.2066875244427063, + "loss": 0.07458128035068512, + "loss_ce": 0.007976663298904896, + "loss_iou": 0.3359375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 205026628, + "step": 2114 + }, + { + "epoch": 0.20678529526789205, + "grad_norm": 10.215000678839269, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 205123744, + "step": 2115 + }, + { + "epoch": 0.20678529526789205, + "loss": 0.09451919794082642, + "loss_ce": 0.00701004546135664, + "loss_iou": 0.3828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 205123744, + "step": 2115 + }, + { + "epoch": 0.20688306609307783, + "grad_norm": 15.499436047809528, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 205219664, + "step": 2116 + }, + { + "epoch": 0.20688306609307783, + "loss": 0.10926354676485062, + "loss_ce": 0.008570796810090542, + "loss_iou": 0.37890625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 205219664, + "step": 2116 + }, + { + "epoch": 0.20698083691826358, + "grad_norm": 10.543028907720572, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 205316836, + "step": 2117 + }, + { + "epoch": 0.20698083691826358, + "loss": 0.08310731500387192, + "loss_ce": 0.0056575145572423935, + "loss_iou": 0.33984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 205316836, + "step": 2117 + }, + { + "epoch": 0.20707860774344936, + "grad_norm": 5.713301717567878, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 205413980, + "step": 2118 + }, + { + "epoch": 0.20707860774344936, + "loss": 0.11737009882926941, + "loss_ce": 0.002349349670112133, + "loss_iou": 0.291015625, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 205413980, + "step": 2118 + }, + { + "epoch": 0.2071763785686351, + "grad_norm": 6.233730761098611, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 205511908, + "step": 2119 + }, + { + "epoch": 0.2071763785686351, + "loss": 0.08558736741542816, + "loss_ce": 0.005341389682143927, + "loss_iou": 0.353515625, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 205511908, + "step": 2119 + }, + { + "epoch": 0.20727414939382088, + "grad_norm": 3.209616325386072, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 205608424, + "step": 2120 + }, + { + "epoch": 0.20727414939382088, + "loss": 0.092686228454113, + "loss_ce": 0.006161264143884182, + "loss_iou": 0.30859375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 205608424, + "step": 2120 + }, + { + "epoch": 0.20737192021900666, + "grad_norm": 15.298957460033725, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 205704920, + "step": 2121 + }, + { + "epoch": 0.20737192021900666, + "loss": 0.10866257548332214, + "loss_ce": 0.002942053135484457, + "loss_iou": 0.2216796875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 205704920, + "step": 2121 + }, + { + "epoch": 0.2074696910441924, + "grad_norm": 14.916137978745962, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 205802692, + "step": 2122 + }, + { + "epoch": 0.2074696910441924, + "loss": 0.1462508887052536, + "loss_ce": 0.004069488495588303, + "loss_iou": 0.470703125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 205802692, + "step": 2122 + }, + { + "epoch": 0.2075674618693782, + "grad_norm": 8.99549370487401, + "learning_rate": 5e-05, + "loss": 0.138, + "num_input_tokens_seen": 205900376, + "step": 2123 + }, + { + "epoch": 0.2075674618693782, + "loss": 0.13472013175487518, + "loss_ce": 0.005996979773044586, + "loss_iou": 0.38671875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 205900376, + "step": 2123 + }, + { + "epoch": 0.20766523269456394, + "grad_norm": 12.50532016837238, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 205997072, + "step": 2124 + }, + { + "epoch": 0.20766523269456394, + "loss": 0.0935935527086258, + "loss_ce": 0.00672526191920042, + "loss_iou": 0.4140625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 205997072, + "step": 2124 + }, + { + "epoch": 0.20776300351974972, + "grad_norm": 22.300936859853444, + "learning_rate": 5e-05, + "loss": 0.1665, + "num_input_tokens_seen": 206094176, + "step": 2125 + }, + { + "epoch": 0.20776300351974972, + "loss": 0.2188163548707962, + "loss_ce": 0.007878856733441353, + "loss_iou": 0.4921875, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 206094176, + "step": 2125 + }, + { + "epoch": 0.20786077434493547, + "grad_norm": 14.27805876695921, + "learning_rate": 5e-05, + "loss": 0.1606, + "num_input_tokens_seen": 206191380, + "step": 2126 + }, + { + "epoch": 0.20786077434493547, + "loss": 0.20020633935928345, + "loss_ce": 0.010142862796783447, + "loss_iou": 0.3984375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 206191380, + "step": 2126 + }, + { + "epoch": 0.20795854517012125, + "grad_norm": 4.5561642871222015, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 206287984, + "step": 2127 + }, + { + "epoch": 0.20795854517012125, + "loss": 0.09412746131420135, + "loss_ce": 0.0041082375682890415, + "loss_iou": 0.26171875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 206287984, + "step": 2127 + }, + { + "epoch": 0.208056315995307, + "grad_norm": 18.4608038868935, + "learning_rate": 5e-05, + "loss": 0.1347, + "num_input_tokens_seen": 206385388, + "step": 2128 + }, + { + "epoch": 0.208056315995307, + "loss": 0.11989372968673706, + "loss_ce": 0.007070247549563646, + "loss_iou": 0.36328125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 206385388, + "step": 2128 + }, + { + "epoch": 0.20815408682049277, + "grad_norm": 20.41526396514624, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 206482884, + "step": 2129 + }, + { + "epoch": 0.20815408682049277, + "loss": 0.06840695440769196, + "loss_ce": 0.004106417298316956, + "loss_iou": 0.41015625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 206482884, + "step": 2129 + }, + { + "epoch": 0.20825185764567852, + "grad_norm": 14.35896473504326, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 206580152, + "step": 2130 + }, + { + "epoch": 0.20825185764567852, + "loss": 0.07460849732160568, + "loss_ce": 0.00708835618570447, + "loss_iou": 0.3828125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 206580152, + "step": 2130 + }, + { + "epoch": 0.2083496284708643, + "grad_norm": 13.841320002456637, + "learning_rate": 5e-05, + "loss": 0.1354, + "num_input_tokens_seen": 206677508, + "step": 2131 + }, + { + "epoch": 0.2083496284708643, + "loss": 0.14029309153556824, + "loss_ce": 0.00583263672888279, + "loss_iou": 0.384765625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 206677508, + "step": 2131 + }, + { + "epoch": 0.20844739929605005, + "grad_norm": 14.964695853518457, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 206773684, + "step": 2132 + }, + { + "epoch": 0.20844739929605005, + "loss": 0.15609188377857208, + "loss_ce": 0.0046941787004470825, + "loss_iou": 0.361328125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 206773684, + "step": 2132 + }, + { + "epoch": 0.20854517012123583, + "grad_norm": 9.460997418560343, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 206871088, + "step": 2133 + }, + { + "epoch": 0.20854517012123583, + "loss": 0.10480619966983795, + "loss_ce": 0.0037319764960557222, + "loss_iou": 0.33984375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 206871088, + "step": 2133 + }, + { + "epoch": 0.20864294094642158, + "grad_norm": 8.16670493073192, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 206968396, + "step": 2134 + }, + { + "epoch": 0.20864294094642158, + "loss": 0.10311224311590195, + "loss_ce": 0.0049677155911922455, + "loss_iou": 0.423828125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 206968396, + "step": 2134 + }, + { + "epoch": 0.20874071177160736, + "grad_norm": 8.747035197968053, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 207064860, + "step": 2135 + }, + { + "epoch": 0.20874071177160736, + "loss": 0.10411618649959564, + "loss_ce": 0.006368381902575493, + "loss_iou": 0.298828125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 207064860, + "step": 2135 + }, + { + "epoch": 0.2088384825967931, + "grad_norm": 3.7397796846299776, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 207162160, + "step": 2136 + }, + { + "epoch": 0.2088384825967931, + "loss": 0.08570932596921921, + "loss_ce": 0.0027015109080821276, + "loss_iou": 0.408203125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 207162160, + "step": 2136 + }, + { + "epoch": 0.20893625342197888, + "grad_norm": 6.215186351238999, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 207259100, + "step": 2137 + }, + { + "epoch": 0.20893625342197888, + "loss": 0.06633955240249634, + "loss_ce": 0.004221022129058838, + "loss_iou": 0.296875, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 207259100, + "step": 2137 + }, + { + "epoch": 0.20903402424716463, + "grad_norm": 10.848822697820118, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 207356528, + "step": 2138 + }, + { + "epoch": 0.20903402424716463, + "loss": 0.09412755072116852, + "loss_ce": 0.0032156906090676785, + "loss_iou": 0.298828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 207356528, + "step": 2138 + }, + { + "epoch": 0.2091317950723504, + "grad_norm": 26.083726881016286, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 207452928, + "step": 2139 + }, + { + "epoch": 0.2091317950723504, + "loss": 0.10514713078737259, + "loss_ce": 0.004591711796820164, + "loss_iou": 0.33984375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 207452928, + "step": 2139 + }, + { + "epoch": 0.20922956589753616, + "grad_norm": 7.936387611150584, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 207550240, + "step": 2140 + }, + { + "epoch": 0.20922956589753616, + "loss": 0.06905511021614075, + "loss_ce": 0.002946407301351428, + "loss_iou": 0.4296875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 207550240, + "step": 2140 + }, + { + "epoch": 0.20932733672272194, + "grad_norm": 7.1132053337556105, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 207647536, + "step": 2141 + }, + { + "epoch": 0.20932733672272194, + "loss": 0.1466366946697235, + "loss_ce": 0.005065649747848511, + "loss_iou": 0.3984375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 207647536, + "step": 2141 + }, + { + "epoch": 0.2094251075479077, + "grad_norm": 5.7244249399677845, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 207744556, + "step": 2142 + }, + { + "epoch": 0.2094251075479077, + "loss": 0.09382496029138565, + "loss_ce": 0.0031724958680570126, + "loss_iou": 0.390625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 207744556, + "step": 2142 + }, + { + "epoch": 0.20952287837309347, + "grad_norm": 4.551013328294836, + "learning_rate": 5e-05, + "loss": 0.1454, + "num_input_tokens_seen": 207841640, + "step": 2143 + }, + { + "epoch": 0.20952287837309347, + "loss": 0.06954880058765411, + "loss_ce": 0.004843901377171278, + "loss_iou": 0.35546875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 207841640, + "step": 2143 + }, + { + "epoch": 0.20962064919827925, + "grad_norm": 5.340463510447533, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 207939044, + "step": 2144 + }, + { + "epoch": 0.20962064919827925, + "loss": 0.10955461859703064, + "loss_ce": 0.002971985377371311, + "loss_iou": 0.296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 207939044, + "step": 2144 + }, + { + "epoch": 0.209718420023465, + "grad_norm": 6.98799674279691, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 208035148, + "step": 2145 + }, + { + "epoch": 0.209718420023465, + "loss": 0.04661654680967331, + "loss_ce": 0.0045690457336604595, + "loss_iou": 0.27734375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 208035148, + "step": 2145 + }, + { + "epoch": 0.20981619084865077, + "grad_norm": 14.776910289028976, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 208132176, + "step": 2146 + }, + { + "epoch": 0.20981619084865077, + "loss": 0.08833254873752594, + "loss_ce": 0.007903472520411015, + "loss_iou": 0.42578125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 208132176, + "step": 2146 + }, + { + "epoch": 0.20991396167383652, + "grad_norm": 43.80169246722408, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 208229124, + "step": 2147 + }, + { + "epoch": 0.20991396167383652, + "loss": 0.10031433403491974, + "loss_ce": 0.007945251651108265, + "loss_iou": 0.421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 208229124, + "step": 2147 + }, + { + "epoch": 0.2100117324990223, + "grad_norm": 12.01578632176669, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 208325840, + "step": 2148 + }, + { + "epoch": 0.2100117324990223, + "loss": 0.08917412161827087, + "loss_ce": 0.0059450604021549225, + "loss_iou": 0.31640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 208325840, + "step": 2148 + }, + { + "epoch": 0.21010950332420805, + "grad_norm": 15.307451232569832, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 208423652, + "step": 2149 + }, + { + "epoch": 0.21010950332420805, + "loss": 0.08075103163719177, + "loss_ce": 0.008164970204234123, + "loss_iou": 0.365234375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 208423652, + "step": 2149 + }, + { + "epoch": 0.21020727414939383, + "grad_norm": 5.78885097862011, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 208519944, + "step": 2150 + }, + { + "epoch": 0.21020727414939383, + "loss": 0.08260875195264816, + "loss_ce": 0.008267928846180439, + "loss_iou": 0.375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 208519944, + "step": 2150 + }, + { + "epoch": 0.21030504497457958, + "grad_norm": 9.512034374597766, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 208617140, + "step": 2151 + }, + { + "epoch": 0.21030504497457958, + "loss": 0.11079776287078857, + "loss_ce": 0.009387854486703873, + "loss_iou": 0.455078125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 208617140, + "step": 2151 + }, + { + "epoch": 0.21040281579976536, + "grad_norm": 8.504791084829016, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 208713720, + "step": 2152 + }, + { + "epoch": 0.21040281579976536, + "loss": 0.08975784480571747, + "loss_ce": 0.005849761888384819, + "loss_iou": 0.341796875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 208713720, + "step": 2152 + }, + { + "epoch": 0.2105005866249511, + "grad_norm": 7.409395005976063, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 208811020, + "step": 2153 + }, + { + "epoch": 0.2105005866249511, + "loss": 0.09793920814990997, + "loss_ce": 0.0036551556549966335, + "loss_iou": 0.30859375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 208811020, + "step": 2153 + }, + { + "epoch": 0.21059835745013689, + "grad_norm": 8.595686167892632, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 208908208, + "step": 2154 + }, + { + "epoch": 0.21059835745013689, + "loss": 0.11939633637666702, + "loss_ce": 0.006435525603592396, + "loss_iou": 0.41796875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 208908208, + "step": 2154 + }, + { + "epoch": 0.21069612827532264, + "grad_norm": 10.004329205719747, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 209004956, + "step": 2155 + }, + { + "epoch": 0.21069612827532264, + "loss": 0.11431269347667694, + "loss_ce": 0.0052581243216991425, + "loss_iou": 0.28515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 209004956, + "step": 2155 + }, + { + "epoch": 0.21079389910050841, + "grad_norm": 34.03203242291048, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 209102744, + "step": 2156 + }, + { + "epoch": 0.21079389910050841, + "loss": 0.07814308255910873, + "loss_ce": 0.0025662987027317286, + "loss_iou": 0.369140625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 209102744, + "step": 2156 + }, + { + "epoch": 0.21089166992569416, + "grad_norm": 6.649595789021755, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 209200112, + "step": 2157 + }, + { + "epoch": 0.21089166992569416, + "loss": 0.09383373707532883, + "loss_ce": 0.002509881742298603, + "loss_iou": 0.2734375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 209200112, + "step": 2157 + }, + { + "epoch": 0.21098944075087994, + "grad_norm": 3.5242756046657187, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 209296804, + "step": 2158 + }, + { + "epoch": 0.21098944075087994, + "loss": 0.08254374563694, + "loss_ce": 0.008538622409105301, + "loss_iou": 0.294921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 209296804, + "step": 2158 + }, + { + "epoch": 0.2110872115760657, + "grad_norm": 15.859873490494271, + "learning_rate": 5e-05, + "loss": 0.158, + "num_input_tokens_seen": 209394284, + "step": 2159 + }, + { + "epoch": 0.2110872115760657, + "loss": 0.16856583952903748, + "loss_ce": 0.008379077538847923, + "loss_iou": 0.40234375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 209394284, + "step": 2159 + }, + { + "epoch": 0.21118498240125147, + "grad_norm": 19.305697456367767, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 209492008, + "step": 2160 + }, + { + "epoch": 0.21118498240125147, + "loss": 0.12148672342300415, + "loss_ce": 0.0076561616733670235, + "loss_iou": 0.373046875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 209492008, + "step": 2160 + }, + { + "epoch": 0.21128275322643722, + "grad_norm": 9.824489803789213, + "learning_rate": 5e-05, + "loss": 0.1198, + "num_input_tokens_seen": 209589368, + "step": 2161 + }, + { + "epoch": 0.21128275322643722, + "loss": 0.1254253387451172, + "loss_ce": 0.0020122569985687733, + "loss_iou": 0.416015625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 209589368, + "step": 2161 + }, + { + "epoch": 0.211380524051623, + "grad_norm": 11.272988497234033, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 209686944, + "step": 2162 + }, + { + "epoch": 0.211380524051623, + "loss": 0.05633874237537384, + "loss_ce": 0.005343870259821415, + "loss_iou": 0.310546875, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 209686944, + "step": 2162 + }, + { + "epoch": 0.21147829487680875, + "grad_norm": 14.625548913752592, + "learning_rate": 5e-05, + "loss": 0.1345, + "num_input_tokens_seen": 209784752, + "step": 2163 + }, + { + "epoch": 0.21147829487680875, + "loss": 0.11850197613239288, + "loss_ce": 0.00802833866328001, + "loss_iou": 0.4375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 209784752, + "step": 2163 + }, + { + "epoch": 0.21157606570199453, + "grad_norm": 13.886708867148144, + "learning_rate": 5e-05, + "loss": 0.1299, + "num_input_tokens_seen": 209880612, + "step": 2164 + }, + { + "epoch": 0.21157606570199453, + "loss": 0.12211838364601135, + "loss_ce": 0.006243141833692789, + "loss_iou": 0.443359375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 209880612, + "step": 2164 + }, + { + "epoch": 0.21167383652718028, + "grad_norm": 5.912089271155677, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 209977384, + "step": 2165 + }, + { + "epoch": 0.21167383652718028, + "loss": 0.1156449168920517, + "loss_ce": 0.005720603279769421, + "loss_iou": 0.328125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 209977384, + "step": 2165 + }, + { + "epoch": 0.21177160735236605, + "grad_norm": 4.884836613173108, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 210073820, + "step": 2166 + }, + { + "epoch": 0.21177160735236605, + "loss": 0.08267394453287125, + "loss_ce": 0.005819238722324371, + "loss_iou": 0.34765625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 210073820, + "step": 2166 + }, + { + "epoch": 0.21186937817755183, + "grad_norm": 6.405747355602735, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 210170212, + "step": 2167 + }, + { + "epoch": 0.21186937817755183, + "loss": 0.09999337792396545, + "loss_ce": 0.00501503748819232, + "loss_iou": 0.30078125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 210170212, + "step": 2167 + }, + { + "epoch": 0.21196714900273758, + "grad_norm": 8.144013645684558, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 210266852, + "step": 2168 + }, + { + "epoch": 0.21196714900273758, + "loss": 0.05704907327890396, + "loss_ce": 0.005474366247653961, + "loss_iou": 0.337890625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 210266852, + "step": 2168 + }, + { + "epoch": 0.21206491982792336, + "grad_norm": 11.6450365094241, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 210364276, + "step": 2169 + }, + { + "epoch": 0.21206491982792336, + "loss": 0.1373121738433838, + "loss_ce": 0.002485510427504778, + "loss_iou": 0.3515625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 210364276, + "step": 2169 + }, + { + "epoch": 0.2121626906531091, + "grad_norm": 4.8272208554880285, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 210461160, + "step": 2170 + }, + { + "epoch": 0.2121626906531091, + "loss": 0.08788859844207764, + "loss_ce": 0.006345629692077637, + "loss_iou": 0.333984375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 210461160, + "step": 2170 + }, + { + "epoch": 0.2122604614782949, + "grad_norm": 6.667607943095278, + "learning_rate": 5e-05, + "loss": 0.1635, + "num_input_tokens_seen": 210558244, + "step": 2171 + }, + { + "epoch": 0.2122604614782949, + "loss": 0.14196962118148804, + "loss_ce": 0.009324977174401283, + "loss_iou": 0.3203125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 210558244, + "step": 2171 + }, + { + "epoch": 0.21235823230348064, + "grad_norm": 10.090709783185737, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 210655744, + "step": 2172 + }, + { + "epoch": 0.21235823230348064, + "loss": 0.11199714243412018, + "loss_ce": 0.002530585043132305, + "loss_iou": 0.36328125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 210655744, + "step": 2172 + }, + { + "epoch": 0.21245600312866642, + "grad_norm": 11.472199774826827, + "learning_rate": 5e-05, + "loss": 0.1461, + "num_input_tokens_seen": 210752976, + "step": 2173 + }, + { + "epoch": 0.21245600312866642, + "loss": 0.18648141622543335, + "loss_ce": 0.0074042645283043385, + "loss_iou": 0.322265625, + "loss_num": 0.035888671875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 210752976, + "step": 2173 + }, + { + "epoch": 0.21255377395385217, + "grad_norm": 7.451689178853859, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 210849460, + "step": 2174 + }, + { + "epoch": 0.21255377395385217, + "loss": 0.05357125401496887, + "loss_ce": 0.003976376727223396, + "loss_iou": 0.2734375, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 210849460, + "step": 2174 + }, + { + "epoch": 0.21265154477903794, + "grad_norm": 4.216050354676405, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 210947036, + "step": 2175 + }, + { + "epoch": 0.21265154477903794, + "loss": 0.05468318611383438, + "loss_ce": 0.004924274515360594, + "loss_iou": 0.4375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 210947036, + "step": 2175 + }, + { + "epoch": 0.2127493156042237, + "grad_norm": 6.096358479012206, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 211044208, + "step": 2176 + }, + { + "epoch": 0.2127493156042237, + "loss": 0.0904792994260788, + "loss_ce": 0.00582353537902236, + "loss_iou": 0.38671875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 211044208, + "step": 2176 + }, + { + "epoch": 0.21284708642940947, + "grad_norm": 8.444539065988296, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 211141620, + "step": 2177 + }, + { + "epoch": 0.21284708642940947, + "loss": 0.1520860195159912, + "loss_ce": 0.006257766392081976, + "loss_iou": 0.2021484375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 211141620, + "step": 2177 + }, + { + "epoch": 0.21294485725459522, + "grad_norm": 25.03510445112485, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 211238156, + "step": 2178 + }, + { + "epoch": 0.21294485725459522, + "loss": 0.087120920419693, + "loss_ce": 0.004059700295329094, + "loss_iou": 0.353515625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 211238156, + "step": 2178 + }, + { + "epoch": 0.213042628079781, + "grad_norm": 20.9306321574481, + "learning_rate": 5e-05, + "loss": 0.1494, + "num_input_tokens_seen": 211334924, + "step": 2179 + }, + { + "epoch": 0.213042628079781, + "loss": 0.1925870180130005, + "loss_ce": 0.007986189797520638, + "loss_iou": 0.435546875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 211334924, + "step": 2179 + }, + { + "epoch": 0.21314039890496675, + "grad_norm": 25.70261478323782, + "learning_rate": 5e-05, + "loss": 0.1223, + "num_input_tokens_seen": 211431904, + "step": 2180 + }, + { + "epoch": 0.21314039890496675, + "loss": 0.08587564527988434, + "loss_ce": 0.005599156022071838, + "loss_iou": 0.408203125, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 211431904, + "step": 2180 + }, + { + "epoch": 0.21323816973015253, + "grad_norm": 21.350266258870636, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 211528820, + "step": 2181 + }, + { + "epoch": 0.21323816973015253, + "loss": 0.11939091235399246, + "loss_ce": 0.00591893307864666, + "loss_iou": 0.3359375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 211528820, + "step": 2181 + }, + { + "epoch": 0.21333594055533828, + "grad_norm": 4.978248611451721, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 211625544, + "step": 2182 + }, + { + "epoch": 0.21333594055533828, + "loss": 0.08264923840761185, + "loss_ce": 0.005058293230831623, + "loss_iou": 0.326171875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 211625544, + "step": 2182 + }, + { + "epoch": 0.21343371138052405, + "grad_norm": 11.061065506634568, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 211723584, + "step": 2183 + }, + { + "epoch": 0.21343371138052405, + "loss": 0.10347042977809906, + "loss_ce": 0.006165135186165571, + "loss_iou": 0.4453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 211723584, + "step": 2183 + }, + { + "epoch": 0.2135314822057098, + "grad_norm": 10.760161842996633, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 211820608, + "step": 2184 + }, + { + "epoch": 0.2135314822057098, + "loss": 0.052994344383478165, + "loss_ce": 0.0019842111505568027, + "loss_iou": 0.47265625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 211820608, + "step": 2184 + }, + { + "epoch": 0.21362925303089558, + "grad_norm": 7.553978325978372, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 211918472, + "step": 2185 + }, + { + "epoch": 0.21362925303089558, + "loss": 0.0976191982626915, + "loss_ce": 0.0025264231953769922, + "loss_iou": 0.37109375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 211918472, + "step": 2185 + }, + { + "epoch": 0.21372702385608133, + "grad_norm": 9.638613333809571, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 212014780, + "step": 2186 + }, + { + "epoch": 0.21372702385608133, + "loss": 0.09305053949356079, + "loss_ce": 0.003244937863200903, + "loss_iou": 0.1796875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 212014780, + "step": 2186 + }, + { + "epoch": 0.2138247946812671, + "grad_norm": 14.582054715990639, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 212111904, + "step": 2187 + }, + { + "epoch": 0.2138247946812671, + "loss": 0.1594332754611969, + "loss_ce": 0.004281916189938784, + "loss_iou": 0.46484375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 212111904, + "step": 2187 + }, + { + "epoch": 0.21392256550645286, + "grad_norm": 11.346636966509475, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 212209684, + "step": 2188 + }, + { + "epoch": 0.21392256550645286, + "loss": 0.08181044459342957, + "loss_ce": 0.005272363778203726, + "loss_iou": 0.392578125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 212209684, + "step": 2188 + }, + { + "epoch": 0.21402033633163864, + "grad_norm": 9.034377329668766, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 212305824, + "step": 2189 + }, + { + "epoch": 0.21402033633163864, + "loss": 0.14488232135772705, + "loss_ce": 0.005188110750168562, + "loss_iou": 0.291015625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 212305824, + "step": 2189 + }, + { + "epoch": 0.21411810715682442, + "grad_norm": 17.236816705658477, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 212402436, + "step": 2190 + }, + { + "epoch": 0.21411810715682442, + "loss": 0.12909317016601562, + "loss_ce": 0.006565088871866465, + "loss_iou": 0.298828125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 212402436, + "step": 2190 + }, + { + "epoch": 0.21421587798201017, + "grad_norm": 7.679441067531924, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 212499140, + "step": 2191 + }, + { + "epoch": 0.21421587798201017, + "loss": 0.09268036484718323, + "loss_ce": 0.0077575682662427425, + "loss_iou": 0.328125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 212499140, + "step": 2191 + }, + { + "epoch": 0.21431364880719594, + "grad_norm": 3.6111178554218935, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 212596628, + "step": 2192 + }, + { + "epoch": 0.21431364880719594, + "loss": 0.10704203695058823, + "loss_ce": 0.004930220544338226, + "loss_iou": 0.359375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 212596628, + "step": 2192 + }, + { + "epoch": 0.2144114196323817, + "grad_norm": 2.376012878307277, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 212695584, + "step": 2193 + }, + { + "epoch": 0.2144114196323817, + "loss": 0.10734604299068451, + "loss_ce": 0.006607512943446636, + "loss_iou": 0.40234375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 212695584, + "step": 2193 + }, + { + "epoch": 0.21450919045756747, + "grad_norm": 7.726990875115108, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 212792136, + "step": 2194 + }, + { + "epoch": 0.21450919045756747, + "loss": 0.0823339894413948, + "loss_ce": 0.003713077399879694, + "loss_iou": 0.2890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 212792136, + "step": 2194 + }, + { + "epoch": 0.21460696128275322, + "grad_norm": 9.05412188211594, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 212888440, + "step": 2195 + }, + { + "epoch": 0.21460696128275322, + "loss": 0.11906842887401581, + "loss_ce": 0.0017130814958363771, + "loss_iou": 0.345703125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 212888440, + "step": 2195 + }, + { + "epoch": 0.214704732107939, + "grad_norm": 12.79985117330287, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 212984912, + "step": 2196 + }, + { + "epoch": 0.214704732107939, + "loss": 0.10827437043190002, + "loss_ce": 0.009092242456972599, + "loss_iou": 0.3671875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 212984912, + "step": 2196 + }, + { + "epoch": 0.21480250293312475, + "grad_norm": 5.428062895633773, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 213081996, + "step": 2197 + }, + { + "epoch": 0.21480250293312475, + "loss": 0.03796558082103729, + "loss_ce": 0.00287036644294858, + "loss_iou": 0.283203125, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 213081996, + "step": 2197 + }, + { + "epoch": 0.21490027375831053, + "grad_norm": 5.968871556646486, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 213179280, + "step": 2198 + }, + { + "epoch": 0.21490027375831053, + "loss": 0.08797451853752136, + "loss_ce": 0.006965603679418564, + "loss_iou": 0.376953125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 213179280, + "step": 2198 + }, + { + "epoch": 0.21499804458349628, + "grad_norm": 10.568860248112166, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 213276220, + "step": 2199 + }, + { + "epoch": 0.21499804458349628, + "loss": 0.09244821965694427, + "loss_ce": 0.0025205453857779503, + "loss_iou": 0.3671875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 213276220, + "step": 2199 + }, + { + "epoch": 0.21509581540868206, + "grad_norm": 9.978803000026833, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 213373072, + "step": 2200 + }, + { + "epoch": 0.21509581540868206, + "loss": 0.09003578126430511, + "loss_ce": 0.0043424200266599655, + "loss_iou": 0.30859375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 213373072, + "step": 2200 + }, + { + "epoch": 0.2151935862338678, + "grad_norm": 7.362160359893551, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 213469996, + "step": 2201 + }, + { + "epoch": 0.2151935862338678, + "loss": 0.1448751538991928, + "loss_ce": 0.0023885793052613735, + "loss_iou": 0.39453125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 213469996, + "step": 2201 + }, + { + "epoch": 0.21529135705905358, + "grad_norm": 9.840245106019584, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 213567700, + "step": 2202 + }, + { + "epoch": 0.21529135705905358, + "loss": 0.14994490146636963, + "loss_ce": 0.004253981169313192, + "loss_iou": 0.33203125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 213567700, + "step": 2202 + }, + { + "epoch": 0.21538912788423933, + "grad_norm": 10.429241307831154, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 213664596, + "step": 2203 + }, + { + "epoch": 0.21538912788423933, + "loss": 0.09738047420978546, + "loss_ce": 0.005049541592597961, + "loss_iou": 0.3515625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 213664596, + "step": 2203 + }, + { + "epoch": 0.2154868987094251, + "grad_norm": 4.717815409833799, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 213762196, + "step": 2204 + }, + { + "epoch": 0.2154868987094251, + "loss": 0.05688994750380516, + "loss_ce": 0.0028128016274422407, + "loss_iou": 0.31640625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 213762196, + "step": 2204 + }, + { + "epoch": 0.21558466953461086, + "grad_norm": 10.771124859435693, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 213858800, + "step": 2205 + }, + { + "epoch": 0.21558466953461086, + "loss": 0.11025290936231613, + "loss_ce": 0.011627722531557083, + "loss_iou": 0.412109375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 213858800, + "step": 2205 + }, + { + "epoch": 0.21568244035979664, + "grad_norm": 8.72134004815053, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 213956596, + "step": 2206 + }, + { + "epoch": 0.21568244035979664, + "loss": 0.07786243408918381, + "loss_ce": 0.00523059768602252, + "loss_iou": 0.33203125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 213956596, + "step": 2206 + }, + { + "epoch": 0.2157802111849824, + "grad_norm": 13.620103032147867, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 214053208, + "step": 2207 + }, + { + "epoch": 0.2157802111849824, + "loss": 0.07426382601261139, + "loss_ce": 0.004088657908141613, + "loss_iou": 0.455078125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 214053208, + "step": 2207 + }, + { + "epoch": 0.21587798201016817, + "grad_norm": 20.324352682396626, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 214149532, + "step": 2208 + }, + { + "epoch": 0.21587798201016817, + "loss": 0.1023653969168663, + "loss_ce": 0.002572916680946946, + "loss_iou": 0.375, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 214149532, + "step": 2208 + }, + { + "epoch": 0.21597575283535392, + "grad_norm": 13.510012966817438, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 214246064, + "step": 2209 + }, + { + "epoch": 0.21597575283535392, + "loss": 0.09171576797962189, + "loss_ce": 0.006113964132964611, + "loss_iou": 0.36328125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 214246064, + "step": 2209 + }, + { + "epoch": 0.2160735236605397, + "grad_norm": 8.049526026385882, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 214342536, + "step": 2210 + }, + { + "epoch": 0.2160735236605397, + "loss": 0.10229594260454178, + "loss_ce": 0.00869852676987648, + "loss_iou": 0.365234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 214342536, + "step": 2210 + }, + { + "epoch": 0.21617129448572545, + "grad_norm": 6.170531073533357, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 214439768, + "step": 2211 + }, + { + "epoch": 0.21617129448572545, + "loss": 0.08351585268974304, + "loss_ce": 0.002537451684474945, + "loss_iou": 0.318359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 214439768, + "step": 2211 + }, + { + "epoch": 0.21626906531091122, + "grad_norm": 6.762240800135531, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 214535964, + "step": 2212 + }, + { + "epoch": 0.21626906531091122, + "loss": 0.09958689659833908, + "loss_ce": 0.0034374529495835304, + "loss_iou": 0.3984375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 214535964, + "step": 2212 + }, + { + "epoch": 0.216366836136097, + "grad_norm": 10.981843343240932, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 214633944, + "step": 2213 + }, + { + "epoch": 0.216366836136097, + "loss": 0.09645123779773712, + "loss_ce": 0.00523420050740242, + "loss_iou": 0.384765625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 214633944, + "step": 2213 + }, + { + "epoch": 0.21646460696128275, + "grad_norm": 6.614465931649012, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 214731384, + "step": 2214 + }, + { + "epoch": 0.21646460696128275, + "loss": 0.08630821108818054, + "loss_ce": 0.007633894216269255, + "loss_iou": 0.40625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 214731384, + "step": 2214 + }, + { + "epoch": 0.21656237778646853, + "grad_norm": 12.5970303990739, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 214829000, + "step": 2215 + }, + { + "epoch": 0.21656237778646853, + "loss": 0.0961218997836113, + "loss_ce": 0.004447093699127436, + "loss_iou": 0.44140625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 214829000, + "step": 2215 + }, + { + "epoch": 0.21666014861165428, + "grad_norm": 9.868633598698981, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 214926196, + "step": 2216 + }, + { + "epoch": 0.21666014861165428, + "loss": 0.09149390459060669, + "loss_ce": 0.0048239813186228275, + "loss_iou": 0.470703125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 214926196, + "step": 2216 + }, + { + "epoch": 0.21675791943684006, + "grad_norm": 7.9879040118336535, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 215022604, + "step": 2217 + }, + { + "epoch": 0.21675791943684006, + "loss": 0.09240711480379105, + "loss_ce": 0.005355724599212408, + "loss_iou": 0.40234375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 215022604, + "step": 2217 + }, + { + "epoch": 0.2168556902620258, + "grad_norm": 12.065538283233904, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 215119704, + "step": 2218 + }, + { + "epoch": 0.2168556902620258, + "loss": 0.08389600366353989, + "loss_ce": 0.00088819011580199, + "loss_iou": 0.51953125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 215119704, + "step": 2218 + }, + { + "epoch": 0.21695346108721159, + "grad_norm": 14.088106035359216, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 215217124, + "step": 2219 + }, + { + "epoch": 0.21695346108721159, + "loss": 0.08019266277551651, + "loss_ce": 0.007133581675589085, + "loss_iou": 0.376953125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 215217124, + "step": 2219 + }, + { + "epoch": 0.21705123191239734, + "grad_norm": 11.703980273156034, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 215314660, + "step": 2220 + }, + { + "epoch": 0.21705123191239734, + "loss": 0.0753755122423172, + "loss_ce": 0.004300070460885763, + "loss_iou": 0.404296875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 215314660, + "step": 2220 + }, + { + "epoch": 0.2171490027375831, + "grad_norm": 8.276565650455941, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 215411868, + "step": 2221 + }, + { + "epoch": 0.2171490027375831, + "loss": 0.0796271413564682, + "loss_ce": 0.0032111206091940403, + "loss_iou": 0.3046875, + "loss_num": 0.01531982421875, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 215411868, + "step": 2221 + }, + { + "epoch": 0.21724677356276886, + "grad_norm": 7.6815137761577255, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 215508480, + "step": 2222 + }, + { + "epoch": 0.21724677356276886, + "loss": 0.06047192960977554, + "loss_ce": 0.0055708070285618305, + "loss_iou": 0.302734375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 215508480, + "step": 2222 + }, + { + "epoch": 0.21734454438795464, + "grad_norm": 8.21673882436825, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 215606228, + "step": 2223 + }, + { + "epoch": 0.21734454438795464, + "loss": 0.11414597928524017, + "loss_ce": 0.006541000213474035, + "loss_iou": 0.33984375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 215606228, + "step": 2223 + }, + { + "epoch": 0.2174423152131404, + "grad_norm": 17.089468331542136, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 215702636, + "step": 2224 + }, + { + "epoch": 0.2174423152131404, + "loss": 0.08664390444755554, + "loss_ce": 0.002781603019684553, + "loss_iou": 0.337890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 215702636, + "step": 2224 + }, + { + "epoch": 0.21754008603832617, + "grad_norm": 25.419145192710825, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 215798572, + "step": 2225 + }, + { + "epoch": 0.21754008603832617, + "loss": 0.08907735347747803, + "loss_ce": 0.00790059007704258, + "loss_iou": 0.357421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 215798572, + "step": 2225 + }, + { + "epoch": 0.21763785686351192, + "grad_norm": 17.283377417503665, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 215896052, + "step": 2226 + }, + { + "epoch": 0.21763785686351192, + "loss": 0.0778266116976738, + "loss_ce": 0.0032416528556495905, + "loss_iou": 0.384765625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 215896052, + "step": 2226 + }, + { + "epoch": 0.2177356276886977, + "grad_norm": 5.814760808974876, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 215993604, + "step": 2227 + }, + { + "epoch": 0.2177356276886977, + "loss": 0.08230944722890854, + "loss_ce": 0.0047947997227311134, + "loss_iou": 0.39453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 215993604, + "step": 2227 + }, + { + "epoch": 0.21783339851388345, + "grad_norm": 13.216839558719768, + "learning_rate": 5e-05, + "loss": 0.1208, + "num_input_tokens_seen": 216089672, + "step": 2228 + }, + { + "epoch": 0.21783339851388345, + "loss": 0.15191376209259033, + "loss_ce": 0.004757999908179045, + "loss_iou": 0.345703125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 216089672, + "step": 2228 + }, + { + "epoch": 0.21793116933906923, + "grad_norm": 25.953339393139746, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 216186292, + "step": 2229 + }, + { + "epoch": 0.21793116933906923, + "loss": 0.10630231350660324, + "loss_ce": 0.006067330949008465, + "loss_iou": 0.34375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 216186292, + "step": 2229 + }, + { + "epoch": 0.21802894016425498, + "grad_norm": 19.74526242977487, + "learning_rate": 5e-05, + "loss": 0.1158, + "num_input_tokens_seen": 216283188, + "step": 2230 + }, + { + "epoch": 0.21802894016425498, + "loss": 0.14006832242012024, + "loss_ce": 0.010399125516414642, + "loss_iou": 0.2470703125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 216283188, + "step": 2230 + }, + { + "epoch": 0.21812671098944075, + "grad_norm": 5.481629546988658, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 216381284, + "step": 2231 + }, + { + "epoch": 0.21812671098944075, + "loss": 0.11227091401815414, + "loss_ce": 0.00867899414151907, + "loss_iou": 0.361328125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 216381284, + "step": 2231 + }, + { + "epoch": 0.2182244818146265, + "grad_norm": 11.858248467412288, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 216478788, + "step": 2232 + }, + { + "epoch": 0.2182244818146265, + "loss": 0.11517494916915894, + "loss_ce": 0.004975976888090372, + "loss_iou": 0.5078125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 216478788, + "step": 2232 + }, + { + "epoch": 0.21832225263981228, + "grad_norm": 49.572211557066325, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 216575696, + "step": 2233 + }, + { + "epoch": 0.21832225263981228, + "loss": 0.1012267917394638, + "loss_ce": 0.006713848561048508, + "loss_iou": 0.375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 216575696, + "step": 2233 + }, + { + "epoch": 0.21842002346499803, + "grad_norm": 10.00211031883579, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 216672684, + "step": 2234 + }, + { + "epoch": 0.21842002346499803, + "loss": 0.07231640815734863, + "loss_ce": 0.0030415039509534836, + "loss_iou": 0.400390625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 216672684, + "step": 2234 + }, + { + "epoch": 0.2185177942901838, + "grad_norm": 8.889699904245273, + "learning_rate": 5e-05, + "loss": 0.1285, + "num_input_tokens_seen": 216769972, + "step": 2235 + }, + { + "epoch": 0.2185177942901838, + "loss": 0.10068754851818085, + "loss_ce": 0.0023904317058622837, + "loss_iou": 0.400390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 216769972, + "step": 2235 + }, + { + "epoch": 0.2186155651153696, + "grad_norm": 5.453390492411804, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 216866232, + "step": 2236 + }, + { + "epoch": 0.2186155651153696, + "loss": 0.11582903563976288, + "loss_ce": 0.007506891153752804, + "loss_iou": 0.3515625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 216866232, + "step": 2236 + }, + { + "epoch": 0.21871333594055534, + "grad_norm": 3.382052934642813, + "learning_rate": 5e-05, + "loss": 0.1119, + "num_input_tokens_seen": 216964072, + "step": 2237 + }, + { + "epoch": 0.21871333594055534, + "loss": 0.12211018800735474, + "loss_ce": 0.005990804173052311, + "loss_iou": 0.4453125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 216964072, + "step": 2237 + }, + { + "epoch": 0.21881110676574111, + "grad_norm": 3.2725616652832055, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 217060688, + "step": 2238 + }, + { + "epoch": 0.21881110676574111, + "loss": 0.0753217414021492, + "loss_ce": 0.006870813202112913, + "loss_iou": 0.353515625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 217060688, + "step": 2238 + }, + { + "epoch": 0.21890887759092686, + "grad_norm": 15.240023365069625, + "learning_rate": 5e-05, + "loss": 0.1304, + "num_input_tokens_seen": 217159344, + "step": 2239 + }, + { + "epoch": 0.21890887759092686, + "loss": 0.14792323112487793, + "loss_ce": 0.01068568043410778, + "loss_iou": 0.416015625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 217159344, + "step": 2239 + }, + { + "epoch": 0.21900664841611264, + "grad_norm": 6.766623792816543, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 217256128, + "step": 2240 + }, + { + "epoch": 0.21900664841611264, + "loss": 0.1276315450668335, + "loss_ce": 0.003531817113980651, + "loss_iou": 0.3125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 217256128, + "step": 2240 + }, + { + "epoch": 0.2191044192412984, + "grad_norm": 8.754479025144686, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 217353004, + "step": 2241 + }, + { + "epoch": 0.2191044192412984, + "loss": 0.11275724321603775, + "loss_ce": 0.005762615241110325, + "loss_iou": 0.306640625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 217353004, + "step": 2241 + }, + { + "epoch": 0.21920219006648417, + "grad_norm": 12.449748891527298, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 217448916, + "step": 2242 + }, + { + "epoch": 0.21920219006648417, + "loss": 0.07624885439872742, + "loss_ce": 0.007309651933610439, + "loss_iou": 0.271484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 217448916, + "step": 2242 + }, + { + "epoch": 0.21929996089166992, + "grad_norm": 22.358915144719653, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 217546212, + "step": 2243 + }, + { + "epoch": 0.21929996089166992, + "loss": 0.07463423907756805, + "loss_ce": 0.0021931398659944534, + "loss_iou": 0.330078125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 217546212, + "step": 2243 + }, + { + "epoch": 0.2193977317168557, + "grad_norm": 7.80984171838569, + "learning_rate": 5e-05, + "loss": 0.1164, + "num_input_tokens_seen": 217643664, + "step": 2244 + }, + { + "epoch": 0.2193977317168557, + "loss": 0.0901191309094429, + "loss_ce": 0.004425770603120327, + "loss_iou": 0.384765625, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 217643664, + "step": 2244 + }, + { + "epoch": 0.21949550254204145, + "grad_norm": 13.869812285358194, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 217741416, + "step": 2245 + }, + { + "epoch": 0.21949550254204145, + "loss": 0.09613160043954849, + "loss_ce": 0.0022900477051734924, + "loss_iou": 0.373046875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 217741416, + "step": 2245 + }, + { + "epoch": 0.21959327336722723, + "grad_norm": 11.568200730255766, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 217838944, + "step": 2246 + }, + { + "epoch": 0.21959327336722723, + "loss": 0.0899549275636673, + "loss_ce": 0.004932953044772148, + "loss_iou": 0.578125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 217838944, + "step": 2246 + }, + { + "epoch": 0.21969104419241298, + "grad_norm": 5.579909401588832, + "learning_rate": 5e-05, + "loss": 0.1273, + "num_input_tokens_seen": 217936816, + "step": 2247 + }, + { + "epoch": 0.21969104419241298, + "loss": 0.10213036835193634, + "loss_ce": 0.0025667680893093348, + "loss_iou": 0.400390625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 217936816, + "step": 2247 + }, + { + "epoch": 0.21978881501759875, + "grad_norm": 5.697070351999774, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 218032980, + "step": 2248 + }, + { + "epoch": 0.21978881501759875, + "loss": 0.08668562024831772, + "loss_ce": 0.002747019287198782, + "loss_iou": 0.345703125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 218032980, + "step": 2248 + }, + { + "epoch": 0.2198865858427845, + "grad_norm": 6.670975347762841, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 218129732, + "step": 2249 + }, + { + "epoch": 0.2198865858427845, + "loss": 0.0676519125699997, + "loss_ce": 0.004816217347979546, + "loss_iou": 0.458984375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 218129732, + "step": 2249 + }, + { + "epoch": 0.21998435666797028, + "grad_norm": 5.525884824849455, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 218227724, + "step": 2250 + }, + { + "epoch": 0.21998435666797028, + "eval_seeclick_CIoU": 0.3011643961071968, + "eval_seeclick_GIoU": 0.30922264605760574, + "eval_seeclick_IoU": 0.38217996060848236, + "eval_seeclick_MAE_all": 0.11485552415251732, + "eval_seeclick_MAE_h": 0.059641074389219284, + "eval_seeclick_MAE_w": 0.16392672061920166, + "eval_seeclick_MAE_x": 0.18154437839984894, + "eval_seeclick_MAE_y": 0.054309917613863945, + "eval_seeclick_NUM_probability": 0.9999646842479706, + "eval_seeclick_inside_bbox": 0.5397727340459824, + "eval_seeclick_loss": 0.3544161319732666, + "eval_seeclick_loss_ce": 0.010198818985372782, + "eval_seeclick_loss_iou": 0.45037841796875, + "eval_seeclick_loss_num": 0.06540679931640625, + "eval_seeclick_loss_xval": 0.327178955078125, + "eval_seeclick_runtime": 75.4918, + "eval_seeclick_samples_per_second": 0.57, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 218227724, + "step": 2250 + }, + { + "epoch": 0.21998435666797028, + "eval_icons_CIoU": 0.7164222896099091, + "eval_icons_GIoU": 0.714885801076889, + "eval_icons_IoU": 0.7329641282558441, + "eval_icons_MAE_all": 0.03946614172309637, + "eval_icons_MAE_h": 0.05147015582770109, + "eval_icons_MAE_w": 0.0304076811298728, + "eval_icons_MAE_x": 0.024751904886215925, + "eval_icons_MAE_y": 0.051234821788966656, + "eval_icons_NUM_probability": 0.9994479119777679, + "eval_icons_inside_bbox": 0.9097222089767456, + "eval_icons_loss": 0.14135771989822388, + "eval_icons_loss_ce": 8.628145951661281e-05, + "eval_icons_loss_iou": 0.51702880859375, + "eval_icons_loss_num": 0.032337188720703125, + "eval_icons_loss_xval": 0.16156005859375, + "eval_icons_runtime": 93.8407, + "eval_icons_samples_per_second": 0.533, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 218227724, + "step": 2250 + }, + { + "epoch": 0.21998435666797028, + "eval_screenspot_CIoU": 0.21762444637715816, + "eval_screenspot_GIoU": 0.18478337675333023, + "eval_screenspot_IoU": 0.338673601547877, + "eval_screenspot_MAE_all": 0.1993624915679296, + "eval_screenspot_MAE_h": 0.15936951587597528, + "eval_screenspot_MAE_w": 0.2425732066233953, + "eval_screenspot_MAE_x": 0.23519697288672128, + "eval_screenspot_MAE_y": 0.1603102758526802, + "eval_screenspot_NUM_probability": 0.9998080929120382, + "eval_screenspot_inside_bbox": 0.5733333428700765, + "eval_screenspot_loss": 0.6789298057556152, + "eval_screenspot_loss_ce": 0.031316411991914116, + "eval_screenspot_loss_iou": 0.31689453125, + "eval_screenspot_loss_num": 0.13255818684895834, + "eval_screenspot_loss_xval": 0.6628824869791666, + "eval_screenspot_runtime": 145.7107, + "eval_screenspot_samples_per_second": 0.611, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 218227724, + "step": 2250 + }, + { + "epoch": 0.21998435666797028, + "eval_compot_CIoU": 0.425009623169899, + "eval_compot_GIoU": 0.39553608000278473, + "eval_compot_IoU": 0.495383620262146, + "eval_compot_MAE_all": 0.10695822164416313, + "eval_compot_MAE_h": 0.10729065351188183, + "eval_compot_MAE_w": 0.11371535807847977, + "eval_compot_MAE_x": 0.10158834978938103, + "eval_compot_MAE_y": 0.10523851029574871, + "eval_compot_NUM_probability": 0.9999545216560364, + "eval_compot_inside_bbox": 0.6684027910232544, + "eval_compot_loss": 0.33408114314079285, + "eval_compot_loss_ce": 0.019808197394013405, + "eval_compot_loss_iou": 0.478271484375, + "eval_compot_loss_num": 0.056640625, + "eval_compot_loss_xval": 0.28326416015625, + "eval_compot_runtime": 85.5204, + "eval_compot_samples_per_second": 0.585, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 218227724, + "step": 2250 + }, + { + "epoch": 0.21998435666797028, + "loss": 0.3275005519390106, + "loss_ce": 0.019639216363430023, + "loss_iou": 0.455078125, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 218227724, + "step": 2250 + }, + { + "epoch": 0.22008212749315603, + "grad_norm": 4.1865141765269325, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 218325048, + "step": 2251 + }, + { + "epoch": 0.22008212749315603, + "loss": 0.10265347361564636, + "loss_ce": 0.006431541405618191, + "loss_iou": 0.470703125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 218325048, + "step": 2251 + }, + { + "epoch": 0.2201798983183418, + "grad_norm": 5.265357894885154, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 218422400, + "step": 2252 + }, + { + "epoch": 0.2201798983183418, + "loss": 0.10658781975507736, + "loss_ce": 0.006703787948936224, + "loss_iou": 0.32421875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 218422400, + "step": 2252 + }, + { + "epoch": 0.22027766914352756, + "grad_norm": 3.4000692474318748, + "learning_rate": 5e-05, + "loss": 0.1312, + "num_input_tokens_seen": 218520040, + "step": 2253 + }, + { + "epoch": 0.22027766914352756, + "loss": 0.11027637124061584, + "loss_ce": 0.006821783725172281, + "loss_iou": 0.4375, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 218520040, + "step": 2253 + }, + { + "epoch": 0.22037543996871334, + "grad_norm": 7.828848355228458, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 218616336, + "step": 2254 + }, + { + "epoch": 0.22037543996871334, + "loss": 0.17580483853816986, + "loss_ce": 0.009606100618839264, + "loss_iou": 0.33203125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 218616336, + "step": 2254 + }, + { + "epoch": 0.2204732107938991, + "grad_norm": 8.758380274067985, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 218713620, + "step": 2255 + }, + { + "epoch": 0.2204732107938991, + "loss": 0.10762462019920349, + "loss_ce": 0.004353140480816364, + "loss_iou": 0.357421875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 218713620, + "step": 2255 + }, + { + "epoch": 0.22057098161908487, + "grad_norm": 3.1885600412698363, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 218810560, + "step": 2256 + }, + { + "epoch": 0.22057098161908487, + "loss": 0.0846310704946518, + "loss_ce": 0.0026608542539179325, + "loss_iou": 0.49609375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 218810560, + "step": 2256 + }, + { + "epoch": 0.22066875244427062, + "grad_norm": 9.493505186869477, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 218907584, + "step": 2257 + }, + { + "epoch": 0.22066875244427062, + "loss": 0.10400442779064178, + "loss_ce": 0.010086577385663986, + "loss_iou": 0.29296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 218907584, + "step": 2257 + }, + { + "epoch": 0.2207665232694564, + "grad_norm": 5.589112359079938, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 219004088, + "step": 2258 + }, + { + "epoch": 0.2207665232694564, + "loss": 0.11020298302173615, + "loss_ce": 0.00540561368688941, + "loss_iou": 0.275390625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 219004088, + "step": 2258 + }, + { + "epoch": 0.22086429409464217, + "grad_norm": 32.207518348751194, + "learning_rate": 5e-05, + "loss": 0.1258, + "num_input_tokens_seen": 219101348, + "step": 2259 + }, + { + "epoch": 0.22086429409464217, + "loss": 0.14980584383010864, + "loss_ce": 0.0054882122203707695, + "loss_iou": 0.359375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 219101348, + "step": 2259 + }, + { + "epoch": 0.22096206491982792, + "grad_norm": 4.354106101851172, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 219198664, + "step": 2260 + }, + { + "epoch": 0.22096206491982792, + "loss": 0.10746055841445923, + "loss_ce": 0.006302421446889639, + "loss_iou": 0.330078125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 219198664, + "step": 2260 + }, + { + "epoch": 0.2210598357450137, + "grad_norm": 55.743306829563764, + "learning_rate": 5e-05, + "loss": 0.1448, + "num_input_tokens_seen": 219295596, + "step": 2261 + }, + { + "epoch": 0.2210598357450137, + "loss": 0.15387029945850372, + "loss_ce": 0.0016180959064513445, + "loss_iou": 0.427734375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 219295596, + "step": 2261 + }, + { + "epoch": 0.22115760657019945, + "grad_norm": 12.648422345823853, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 219392104, + "step": 2262 + }, + { + "epoch": 0.22115760657019945, + "loss": 0.11983872205018997, + "loss_ce": 0.00655747763812542, + "loss_iou": 0.390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 219392104, + "step": 2262 + }, + { + "epoch": 0.22125537739538523, + "grad_norm": 5.787947272982945, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 219489320, + "step": 2263 + }, + { + "epoch": 0.22125537739538523, + "loss": 0.1185593530535698, + "loss_ce": 0.004759308882057667, + "loss_iou": 0.451171875, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 219489320, + "step": 2263 + }, + { + "epoch": 0.22135314822057098, + "grad_norm": 6.288192986489032, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 219586480, + "step": 2264 + }, + { + "epoch": 0.22135314822057098, + "loss": 0.1225733533501625, + "loss_ce": 0.007514449767768383, + "loss_iou": 0.359375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 219586480, + "step": 2264 + }, + { + "epoch": 0.22145091904575676, + "grad_norm": 5.177642765406564, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 219684508, + "step": 2265 + }, + { + "epoch": 0.22145091904575676, + "loss": 0.092185378074646, + "loss_ce": 0.007132884580641985, + "loss_iou": 0.400390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 219684508, + "step": 2265 + }, + { + "epoch": 0.2215486898709425, + "grad_norm": 5.643055066834393, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 219781280, + "step": 2266 + }, + { + "epoch": 0.2215486898709425, + "loss": 0.1068890392780304, + "loss_ce": 0.0030529722571372986, + "loss_iou": 0.318359375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 219781280, + "step": 2266 + }, + { + "epoch": 0.22164646069612828, + "grad_norm": 3.86909830048507, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 219877952, + "step": 2267 + }, + { + "epoch": 0.22164646069612828, + "loss": 0.09247620403766632, + "loss_ce": 0.006660780869424343, + "loss_iou": 0.353515625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 219877952, + "step": 2267 + }, + { + "epoch": 0.22174423152131403, + "grad_norm": 3.6519414006946267, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 219974092, + "step": 2268 + }, + { + "epoch": 0.22174423152131403, + "loss": 0.0857079029083252, + "loss_ce": 0.009841198101639748, + "loss_iou": 0.2421875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 219974092, + "step": 2268 + }, + { + "epoch": 0.2218420023464998, + "grad_norm": 5.2402476330901, + "learning_rate": 5e-05, + "loss": 0.1217, + "num_input_tokens_seen": 220071100, + "step": 2269 + }, + { + "epoch": 0.2218420023464998, + "loss": 0.12150461226701736, + "loss_ce": 0.009352508932352066, + "loss_iou": 0.390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 220071100, + "step": 2269 + }, + { + "epoch": 0.22193977317168556, + "grad_norm": 2.60398377655469, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 220166348, + "step": 2270 + }, + { + "epoch": 0.22193977317168556, + "loss": 0.0743313580751419, + "loss_ce": 0.005041200201958418, + "loss_iou": 0.326171875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 220166348, + "step": 2270 + }, + { + "epoch": 0.22203754399687134, + "grad_norm": 4.5909712275109325, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 220263020, + "step": 2271 + }, + { + "epoch": 0.22203754399687134, + "loss": 0.0756484866142273, + "loss_ce": 0.002360518788918853, + "loss_iou": 0.392578125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 220263020, + "step": 2271 + }, + { + "epoch": 0.2221353148220571, + "grad_norm": 7.450406532068531, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 220359148, + "step": 2272 + }, + { + "epoch": 0.2221353148220571, + "loss": 0.08813443034887314, + "loss_ce": 0.002074854914098978, + "loss_iou": 0.388671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 220359148, + "step": 2272 + }, + { + "epoch": 0.22223308564724287, + "grad_norm": 13.941707104976778, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 220456628, + "step": 2273 + }, + { + "epoch": 0.22223308564724287, + "loss": 0.12878306210041046, + "loss_ce": 0.009459332562983036, + "loss_iou": 0.375, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 220456628, + "step": 2273 + }, + { + "epoch": 0.22233085647242862, + "grad_norm": 10.541887489210227, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 220553720, + "step": 2274 + }, + { + "epoch": 0.22233085647242862, + "loss": 0.09569094330072403, + "loss_ce": 0.004290792625397444, + "loss_iou": 0.39453125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 220553720, + "step": 2274 + }, + { + "epoch": 0.2224286272976144, + "grad_norm": 13.70908773152742, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 220651480, + "step": 2275 + }, + { + "epoch": 0.2224286272976144, + "loss": 0.05723199248313904, + "loss_ce": 0.003795713186264038, + "loss_iou": 0.435546875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 220651480, + "step": 2275 + }, + { + "epoch": 0.22252639812280015, + "grad_norm": 8.118449423613253, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 220750328, + "step": 2276 + }, + { + "epoch": 0.22252639812280015, + "loss": 0.1335963010787964, + "loss_ce": 0.0060328226536512375, + "loss_iou": 0.396484375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 220750328, + "step": 2276 + }, + { + "epoch": 0.22262416894798592, + "grad_norm": 7.4298081057854555, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 220848128, + "step": 2277 + }, + { + "epoch": 0.22262416894798592, + "loss": 0.1256934255361557, + "loss_ce": 0.007041086442768574, + "loss_iou": 0.5, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 220848128, + "step": 2277 + }, + { + "epoch": 0.22272193977317167, + "grad_norm": 9.541811104600844, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 220945440, + "step": 2278 + }, + { + "epoch": 0.22272193977317167, + "loss": 0.09763635694980621, + "loss_ce": 0.00312341982498765, + "loss_iou": 0.337890625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 220945440, + "step": 2278 + }, + { + "epoch": 0.22281971059835745, + "grad_norm": 10.492574299037315, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 221041100, + "step": 2279 + }, + { + "epoch": 0.22281971059835745, + "loss": 0.10220812261104584, + "loss_ce": 0.009938230738043785, + "loss_iou": 0.240234375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 221041100, + "step": 2279 + }, + { + "epoch": 0.2229174814235432, + "grad_norm": 2.9072405651110547, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 221137428, + "step": 2280 + }, + { + "epoch": 0.2229174814235432, + "loss": 0.08486871421337128, + "loss_ce": 0.003951354417949915, + "loss_iou": 0.328125, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 221137428, + "step": 2280 + }, + { + "epoch": 0.22301525224872898, + "grad_norm": 14.020236037556234, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 221234484, + "step": 2281 + }, + { + "epoch": 0.22301525224872898, + "loss": 0.05240469425916672, + "loss_ce": 0.0022032796405255795, + "loss_iou": 0.36328125, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 221234484, + "step": 2281 + }, + { + "epoch": 0.22311302307391476, + "grad_norm": 21.48573622269713, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 221331052, + "step": 2282 + }, + { + "epoch": 0.22311302307391476, + "loss": 0.13753215968608856, + "loss_ce": 0.0036667962558567524, + "loss_iou": 0.220703125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 221331052, + "step": 2282 + }, + { + "epoch": 0.2232107938991005, + "grad_norm": 3.6850739436075806, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 221427028, + "step": 2283 + }, + { + "epoch": 0.2232107938991005, + "loss": 0.0892786979675293, + "loss_ce": 0.005668163299560547, + "loss_iou": 0.27734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 221427028, + "step": 2283 + }, + { + "epoch": 0.22330856472428628, + "grad_norm": 2.3683737679819576, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 221523488, + "step": 2284 + }, + { + "epoch": 0.22330856472428628, + "loss": 0.09255215525627136, + "loss_ce": 0.013603180646896362, + "loss_iou": 0.26953125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 221523488, + "step": 2284 + }, + { + "epoch": 0.22340633554947203, + "grad_norm": 3.6689616720697584, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 221619460, + "step": 2285 + }, + { + "epoch": 0.22340633554947203, + "loss": 0.04935155063867569, + "loss_ce": 0.0073135895654559135, + "loss_iou": 0.3828125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 221619460, + "step": 2285 + }, + { + "epoch": 0.2235041063746578, + "grad_norm": 3.3294186052336987, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 221717904, + "step": 2286 + }, + { + "epoch": 0.2235041063746578, + "loss": 0.06845072656869888, + "loss_ce": 0.0022428433876484632, + "loss_iou": 0.388671875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 221717904, + "step": 2286 + }, + { + "epoch": 0.22360187719984356, + "grad_norm": 5.882281142815313, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 221814924, + "step": 2287 + }, + { + "epoch": 0.22360187719984356, + "loss": 0.061539165675640106, + "loss_ce": 0.003479471430182457, + "loss_iou": 0.330078125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 221814924, + "step": 2287 + }, + { + "epoch": 0.22369964802502934, + "grad_norm": 11.264856717085948, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 221912316, + "step": 2288 + }, + { + "epoch": 0.22369964802502934, + "loss": 0.07925570011138916, + "loss_ce": 0.008805867284536362, + "loss_iou": 0.365234375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 221912316, + "step": 2288 + }, + { + "epoch": 0.2237974188502151, + "grad_norm": 28.005296849302162, + "learning_rate": 5e-05, + "loss": 0.1872, + "num_input_tokens_seen": 222008632, + "step": 2289 + }, + { + "epoch": 0.2237974188502151, + "loss": 0.19746015965938568, + "loss_ce": 0.012340539135038853, + "loss_iou": 0.36328125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 222008632, + "step": 2289 + }, + { + "epoch": 0.22389518967540087, + "grad_norm": 14.351784942860387, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 222105352, + "step": 2290 + }, + { + "epoch": 0.22389518967540087, + "loss": 0.1294604241847992, + "loss_ce": 0.004368856083601713, + "loss_iou": 0.404296875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 222105352, + "step": 2290 + }, + { + "epoch": 0.22399296050058662, + "grad_norm": 15.09541973272145, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 222202708, + "step": 2291 + }, + { + "epoch": 0.22399296050058662, + "loss": 0.08132436126470566, + "loss_ce": 0.002070212736725807, + "loss_iou": 0.35546875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 222202708, + "step": 2291 + }, + { + "epoch": 0.2240907313257724, + "grad_norm": 15.011519745032142, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 222299040, + "step": 2292 + }, + { + "epoch": 0.2240907313257724, + "loss": 0.1235707625746727, + "loss_ce": 0.004245124291628599, + "loss_iou": 0.267578125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 222299040, + "step": 2292 + }, + { + "epoch": 0.22418850215095815, + "grad_norm": 14.44660547699063, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 222395500, + "step": 2293 + }, + { + "epoch": 0.22418850215095815, + "loss": 0.07411829382181168, + "loss_ce": 0.002875005593523383, + "loss_iou": 0.333984375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 222395500, + "step": 2293 + }, + { + "epoch": 0.22428627297614392, + "grad_norm": 3.570493785012438, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 222492640, + "step": 2294 + }, + { + "epoch": 0.22428627297614392, + "loss": 0.1044248640537262, + "loss_ce": 0.011208923533558846, + "loss_iou": 0.18359375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 222492640, + "step": 2294 + }, + { + "epoch": 0.22438404380132967, + "grad_norm": 48.805774595882276, + "learning_rate": 5e-05, + "loss": 0.1456, + "num_input_tokens_seen": 222589156, + "step": 2295 + }, + { + "epoch": 0.22438404380132967, + "loss": 0.10071903467178345, + "loss_ce": 0.0036426172591745853, + "loss_iou": 0.31640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 222589156, + "step": 2295 + }, + { + "epoch": 0.22448181462651545, + "grad_norm": 79.16936620688651, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 222686340, + "step": 2296 + }, + { + "epoch": 0.22448181462651545, + "loss": 0.10583722591400146, + "loss_ce": 0.00412213709205389, + "loss_iou": 0.43359375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 222686340, + "step": 2296 + }, + { + "epoch": 0.2245795854517012, + "grad_norm": 14.70027831984325, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 222783580, + "step": 2297 + }, + { + "epoch": 0.2245795854517012, + "loss": 0.10866699367761612, + "loss_ce": 0.00411376915872097, + "loss_iou": 0.376953125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 222783580, + "step": 2297 + }, + { + "epoch": 0.22467735627688698, + "grad_norm": 2.5841590839454778, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 222880108, + "step": 2298 + }, + { + "epoch": 0.22467735627688698, + "loss": 0.0729021430015564, + "loss_ce": 0.009532393887639046, + "loss_iou": 0.306640625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 222880108, + "step": 2298 + }, + { + "epoch": 0.22477512710207273, + "grad_norm": 12.37733225752328, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 222977232, + "step": 2299 + }, + { + "epoch": 0.22477512710207273, + "loss": 0.09944003075361252, + "loss_ce": 0.01142161525785923, + "loss_iou": 0.3515625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 222977232, + "step": 2299 + }, + { + "epoch": 0.2248728979272585, + "grad_norm": 1.7106863573369795, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 223075036, + "step": 2300 + }, + { + "epoch": 0.2248728979272585, + "loss": 0.1084519624710083, + "loss_ce": 0.005821343511343002, + "loss_iou": 0.298828125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 223075036, + "step": 2300 + }, + { + "epoch": 0.22497066875244426, + "grad_norm": 4.347672861638501, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 223172436, + "step": 2301 + }, + { + "epoch": 0.22497066875244426, + "loss": 0.05395805090665817, + "loss_ce": 0.0047103059478104115, + "loss_iou": 0.341796875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 223172436, + "step": 2301 + }, + { + "epoch": 0.22506843957763004, + "grad_norm": 5.7589654658214835, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 223268948, + "step": 2302 + }, + { + "epoch": 0.22506843957763004, + "loss": 0.08977419137954712, + "loss_ce": 0.0036612160038203, + "loss_iou": 0.291015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 223268948, + "step": 2302 + }, + { + "epoch": 0.2251662104028158, + "grad_norm": 8.254430548440729, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 223366180, + "step": 2303 + }, + { + "epoch": 0.2251662104028158, + "loss": 0.06137808412313461, + "loss_ce": 0.008651342242956161, + "loss_iou": 0.2333984375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 223366180, + "step": 2303 + }, + { + "epoch": 0.22526398122800156, + "grad_norm": 7.804084434909382, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 223463256, + "step": 2304 + }, + { + "epoch": 0.22526398122800156, + "loss": 0.08405411243438721, + "loss_ce": 0.003518222365528345, + "loss_iou": 0.29296875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 223463256, + "step": 2304 + }, + { + "epoch": 0.22536175205318734, + "grad_norm": 2.0677791513380557, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 223559124, + "step": 2305 + }, + { + "epoch": 0.22536175205318734, + "loss": 0.09791877120733261, + "loss_ce": 0.011065739206969738, + "loss_iou": 0.353515625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 223559124, + "step": 2305 + }, + { + "epoch": 0.2254595228783731, + "grad_norm": 8.014439527806838, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 223656724, + "step": 2306 + }, + { + "epoch": 0.2254595228783731, + "loss": 0.06235479936003685, + "loss_ce": 0.0037915650755167007, + "loss_iou": 0.34765625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 223656724, + "step": 2306 + }, + { + "epoch": 0.22555729370355887, + "grad_norm": 3.55000708167653, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 223753988, + "step": 2307 + }, + { + "epoch": 0.22555729370355887, + "loss": 0.12392376363277435, + "loss_ce": 0.015265925787389278, + "loss_iou": 0.359375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 223753988, + "step": 2307 + }, + { + "epoch": 0.22565506452874462, + "grad_norm": 6.06467107346341, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 223851088, + "step": 2308 + }, + { + "epoch": 0.22565506452874462, + "loss": 0.13253095746040344, + "loss_ce": 0.006859573069959879, + "loss_iou": 0.462890625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 223851088, + "step": 2308 + }, + { + "epoch": 0.2257528353539304, + "grad_norm": 6.276344707791061, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 223948896, + "step": 2309 + }, + { + "epoch": 0.2257528353539304, + "loss": 0.1153334230184555, + "loss_ce": 0.011802537366747856, + "loss_iou": 0.3125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 223948896, + "step": 2309 + }, + { + "epoch": 0.22585060617911615, + "grad_norm": 4.11342133130279, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 224046636, + "step": 2310 + }, + { + "epoch": 0.22585060617911615, + "loss": 0.09521406143903732, + "loss_ce": 0.0040733120404183865, + "loss_iou": 0.28125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 224046636, + "step": 2310 + }, + { + "epoch": 0.22594837700430193, + "grad_norm": 8.856496160917981, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 224144336, + "step": 2311 + }, + { + "epoch": 0.22594837700430193, + "loss": 0.12514075636863708, + "loss_ce": 0.003528207540512085, + "loss_iou": 0.384765625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 224144336, + "step": 2311 + }, + { + "epoch": 0.22604614782948768, + "grad_norm": 4.955704505491842, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 224240964, + "step": 2312 + }, + { + "epoch": 0.22604614782948768, + "loss": 0.07759897410869598, + "loss_ce": 0.01017038431018591, + "loss_iou": 0.2470703125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 224240964, + "step": 2312 + }, + { + "epoch": 0.22614391865467345, + "grad_norm": 4.070401794073503, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 224338324, + "step": 2313 + }, + { + "epoch": 0.22614391865467345, + "loss": 0.06743178516626358, + "loss_ce": 0.0062440382316708565, + "loss_iou": 0.37890625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 224338324, + "step": 2313 + }, + { + "epoch": 0.2262416894798592, + "grad_norm": 3.75804373431691, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 224435216, + "step": 2314 + }, + { + "epoch": 0.2262416894798592, + "loss": 0.10023649781942368, + "loss_ce": 0.008714279159903526, + "loss_iou": 0.3515625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 224435216, + "step": 2314 + }, + { + "epoch": 0.22633946030504498, + "grad_norm": 17.615885658866162, + "learning_rate": 5e-05, + "loss": 0.1258, + "num_input_tokens_seen": 224531652, + "step": 2315 + }, + { + "epoch": 0.22633946030504498, + "loss": 0.12642037868499756, + "loss_ce": 0.008988742716610432, + "loss_iou": 0.38671875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 224531652, + "step": 2315 + }, + { + "epoch": 0.22643723113023073, + "grad_norm": 7.412397861366831, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 224627960, + "step": 2316 + }, + { + "epoch": 0.22643723113023073, + "loss": 0.1295715719461441, + "loss_ce": 0.005456588231027126, + "loss_iou": 0.3515625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 224627960, + "step": 2316 + }, + { + "epoch": 0.2265350019554165, + "grad_norm": 12.253885703121309, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 224724020, + "step": 2317 + }, + { + "epoch": 0.2265350019554165, + "loss": 0.07814504206180573, + "loss_ce": 0.007542626932263374, + "loss_iou": 0.294921875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 224724020, + "step": 2317 + }, + { + "epoch": 0.22663277278060226, + "grad_norm": 4.667117372310034, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 224820300, + "step": 2318 + }, + { + "epoch": 0.22663277278060226, + "loss": 0.10436356067657471, + "loss_ce": 0.007241373881697655, + "loss_iou": 0.33984375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 224820300, + "step": 2318 + }, + { + "epoch": 0.22673054360578804, + "grad_norm": 4.927148947150611, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 224917260, + "step": 2319 + }, + { + "epoch": 0.22673054360578804, + "loss": 0.09970904141664505, + "loss_ce": 0.005547049455344677, + "loss_iou": 0.3671875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 224917260, + "step": 2319 + }, + { + "epoch": 0.2268283144309738, + "grad_norm": 5.670695658350034, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 225014272, + "step": 2320 + }, + { + "epoch": 0.2268283144309738, + "loss": 0.09933561086654663, + "loss_ce": 0.005524568259716034, + "loss_iou": 0.28515625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 225014272, + "step": 2320 + }, + { + "epoch": 0.22692608525615957, + "grad_norm": 7.397457473329953, + "learning_rate": 5e-05, + "loss": 0.1256, + "num_input_tokens_seen": 225111296, + "step": 2321 + }, + { + "epoch": 0.22692608525615957, + "loss": 0.15628774464130402, + "loss_ce": 0.007239889819175005, + "loss_iou": 0.296875, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 225111296, + "step": 2321 + }, + { + "epoch": 0.22702385608134532, + "grad_norm": 8.477058512568624, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 225208296, + "step": 2322 + }, + { + "epoch": 0.22702385608134532, + "loss": 0.14460614323616028, + "loss_ce": 0.0046830447390675545, + "loss_iou": 0.375, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 225208296, + "step": 2322 + }, + { + "epoch": 0.2271216269065311, + "grad_norm": 3.8730687662097356, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 225304908, + "step": 2323 + }, + { + "epoch": 0.2271216269065311, + "loss": 0.0790291279554367, + "loss_ce": 0.0017281024483963847, + "loss_iou": 0.314453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 225304908, + "step": 2323 + }, + { + "epoch": 0.22721939773171684, + "grad_norm": 8.837136946289306, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 225401796, + "step": 2324 + }, + { + "epoch": 0.22721939773171684, + "loss": 0.089271679520607, + "loss_ce": 0.005668772384524345, + "loss_iou": 0.279296875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 225401796, + "step": 2324 + }, + { + "epoch": 0.22731716855690262, + "grad_norm": 9.939184811562821, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 225499388, + "step": 2325 + }, + { + "epoch": 0.22731716855690262, + "loss": 0.07374119758605957, + "loss_ce": 0.005595450755208731, + "loss_iou": 0.40234375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 225499388, + "step": 2325 + }, + { + "epoch": 0.22741493938208837, + "grad_norm": 10.996926943734609, + "learning_rate": 5e-05, + "loss": 0.1402, + "num_input_tokens_seen": 225595688, + "step": 2326 + }, + { + "epoch": 0.22741493938208837, + "loss": 0.1313370168209076, + "loss_ce": 0.004322856664657593, + "loss_iou": 0.2470703125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 225595688, + "step": 2326 + }, + { + "epoch": 0.22751271020727415, + "grad_norm": 8.348102508211388, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 225692348, + "step": 2327 + }, + { + "epoch": 0.22751271020727415, + "loss": 0.12277964502573013, + "loss_ce": 0.008094590157270432, + "loss_iou": 0.220703125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 225692348, + "step": 2327 + }, + { + "epoch": 0.22761048103245993, + "grad_norm": 7.082583284763931, + "learning_rate": 5e-05, + "loss": 0.1374, + "num_input_tokens_seen": 225789492, + "step": 2328 + }, + { + "epoch": 0.22761048103245993, + "loss": 0.17862388491630554, + "loss_ce": 0.012413708493113518, + "loss_iou": 0.29296875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 225789492, + "step": 2328 + }, + { + "epoch": 0.22770825185764568, + "grad_norm": 11.099824630071533, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 225886488, + "step": 2329 + }, + { + "epoch": 0.22770825185764568, + "loss": 0.10868841409683228, + "loss_ce": 0.004348812624812126, + "loss_iou": 0.443359375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 225886488, + "step": 2329 + }, + { + "epoch": 0.22780602268283145, + "grad_norm": 15.35781482069192, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 225983216, + "step": 2330 + }, + { + "epoch": 0.22780602268283145, + "loss": 0.107576385140419, + "loss_ce": 0.008852013386785984, + "loss_iou": 0.27734375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 225983216, + "step": 2330 + }, + { + "epoch": 0.2279037935080172, + "grad_norm": 11.973443285823567, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 226080292, + "step": 2331 + }, + { + "epoch": 0.2279037935080172, + "loss": 0.13308028876781464, + "loss_ce": 0.007012170739471912, + "loss_iou": 0.271484375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 226080292, + "step": 2331 + }, + { + "epoch": 0.22800156433320298, + "grad_norm": 9.104055851862238, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 226177148, + "step": 2332 + }, + { + "epoch": 0.22800156433320298, + "loss": 0.04693690687417984, + "loss_ce": 0.006623184308409691, + "loss_iou": 0.29296875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 226177148, + "step": 2332 + }, + { + "epoch": 0.22809933515838873, + "grad_norm": 13.49224800728828, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 226274292, + "step": 2333 + }, + { + "epoch": 0.22809933515838873, + "loss": 0.07413147389888763, + "loss_ce": 0.0031781047582626343, + "loss_iou": 0.3984375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 226274292, + "step": 2333 + }, + { + "epoch": 0.2281971059835745, + "grad_norm": 10.32077824612206, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 226371300, + "step": 2334 + }, + { + "epoch": 0.2281971059835745, + "loss": 0.08803737908601761, + "loss_ce": 0.0075930459424853325, + "loss_iou": 0.333984375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 226371300, + "step": 2334 + }, + { + "epoch": 0.22829487680876026, + "grad_norm": 8.049101330004046, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 226469344, + "step": 2335 + }, + { + "epoch": 0.22829487680876026, + "loss": 0.14418423175811768, + "loss_ce": 0.004108539782464504, + "loss_iou": 0.40625, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 226469344, + "step": 2335 + }, + { + "epoch": 0.22839264763394604, + "grad_norm": 6.613892033496259, + "learning_rate": 5e-05, + "loss": 0.1463, + "num_input_tokens_seen": 226566556, + "step": 2336 + }, + { + "epoch": 0.22839264763394604, + "loss": 0.12830856442451477, + "loss_ce": 0.0024769569281488657, + "loss_iou": 0.36328125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 226566556, + "step": 2336 + }, + { + "epoch": 0.2284904184591318, + "grad_norm": 12.657281244093683, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 226663284, + "step": 2337 + }, + { + "epoch": 0.2284904184591318, + "loss": 0.12215482443571091, + "loss_ce": 0.0060964785516262054, + "loss_iou": 0.3046875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 226663284, + "step": 2337 + }, + { + "epoch": 0.22858818928431757, + "grad_norm": 16.24533267148622, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 226759608, + "step": 2338 + }, + { + "epoch": 0.22858818928431757, + "loss": 0.11245866864919662, + "loss_ce": 0.009805159643292427, + "loss_iou": 0.1650390625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 226759608, + "step": 2338 + }, + { + "epoch": 0.22868596010950332, + "grad_norm": 10.722553996291667, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 226856172, + "step": 2339 + }, + { + "epoch": 0.22868596010950332, + "loss": 0.11244438588619232, + "loss_ce": 0.007242661900818348, + "loss_iou": 0.416015625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 226856172, + "step": 2339 + }, + { + "epoch": 0.2287837309346891, + "grad_norm": 10.695721969708877, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 226954148, + "step": 2340 + }, + { + "epoch": 0.2287837309346891, + "loss": 0.10147415101528168, + "loss_ce": 0.0068391356617212296, + "loss_iou": 0.31640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 226954148, + "step": 2340 + }, + { + "epoch": 0.22888150175987484, + "grad_norm": 11.955193992052552, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 227050168, + "step": 2341 + }, + { + "epoch": 0.22888150175987484, + "loss": 0.06168867647647858, + "loss_ce": 0.004369039088487625, + "loss_iou": 0.2265625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 227050168, + "step": 2341 + }, + { + "epoch": 0.22897927258506062, + "grad_norm": 15.3879691558898, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 227147336, + "step": 2342 + }, + { + "epoch": 0.22897927258506062, + "loss": 0.06214195117354393, + "loss_ce": 0.004707867279648781, + "loss_iou": 0.35546875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 227147336, + "step": 2342 + }, + { + "epoch": 0.22907704341024637, + "grad_norm": 20.833468651361834, + "learning_rate": 5e-05, + "loss": 0.1278, + "num_input_tokens_seen": 227245744, + "step": 2343 + }, + { + "epoch": 0.22907704341024637, + "loss": 0.1351867914199829, + "loss_ce": 0.0070740096271038055, + "loss_iou": 0.43359375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 227245744, + "step": 2343 + }, + { + "epoch": 0.22917481423543215, + "grad_norm": 10.06475088086968, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 227342552, + "step": 2344 + }, + { + "epoch": 0.22917481423543215, + "loss": 0.08818650245666504, + "loss_ce": 0.004873515106737614, + "loss_iou": 0.423828125, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 227342552, + "step": 2344 + }, + { + "epoch": 0.2292725850606179, + "grad_norm": 13.75409230295372, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 227439324, + "step": 2345 + }, + { + "epoch": 0.2292725850606179, + "loss": 0.08149658888578415, + "loss_ce": 0.0051263486966490746, + "loss_iou": 0.392578125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 227439324, + "step": 2345 + }, + { + "epoch": 0.22937035588580368, + "grad_norm": 12.973256290198016, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 227535416, + "step": 2346 + }, + { + "epoch": 0.22937035588580368, + "loss": 0.09935629367828369, + "loss_ce": 0.0045000300742685795, + "loss_iou": 0.275390625, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 227535416, + "step": 2346 + }, + { + "epoch": 0.22946812671098943, + "grad_norm": 3.7669635322052883, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 227631832, + "step": 2347 + }, + { + "epoch": 0.22946812671098943, + "loss": 0.12254415452480316, + "loss_ce": 0.009919027797877789, + "loss_iou": 0.23046875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 227631832, + "step": 2347 + }, + { + "epoch": 0.2295658975361752, + "grad_norm": 4.705674661953144, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 227729672, + "step": 2348 + }, + { + "epoch": 0.2295658975361752, + "loss": 0.08650890737771988, + "loss_ce": 0.006858029402792454, + "loss_iou": 0.33203125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 227729672, + "step": 2348 + }, + { + "epoch": 0.22966366836136096, + "grad_norm": 15.276077151355214, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 227825608, + "step": 2349 + }, + { + "epoch": 0.22966366836136096, + "loss": 0.11672569066286087, + "loss_ce": 0.003574141301214695, + "loss_iou": 0.294921875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 227825608, + "step": 2349 + }, + { + "epoch": 0.22976143918654673, + "grad_norm": 15.96284634142027, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 227922084, + "step": 2350 + }, + { + "epoch": 0.22976143918654673, + "loss": 0.10060260444879532, + "loss_ce": 0.008088568225502968, + "loss_iou": 0.3046875, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 227922084, + "step": 2350 + }, + { + "epoch": 0.2298592100117325, + "grad_norm": 7.126940454037086, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 228019692, + "step": 2351 + }, + { + "epoch": 0.2298592100117325, + "loss": 0.07530754059553146, + "loss_ce": 0.006475141271948814, + "loss_iou": 0.333984375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 228019692, + "step": 2351 + }, + { + "epoch": 0.22995698083691826, + "grad_norm": 14.56652360248507, + "learning_rate": 5e-05, + "loss": 0.1307, + "num_input_tokens_seen": 228117172, + "step": 2352 + }, + { + "epoch": 0.22995698083691826, + "loss": 0.13168001174926758, + "loss_ce": 0.007046227343380451, + "loss_iou": 0.298828125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 228117172, + "step": 2352 + }, + { + "epoch": 0.23005475166210404, + "grad_norm": 7.58196983939869, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 228214216, + "step": 2353 + }, + { + "epoch": 0.23005475166210404, + "loss": 0.12550011277198792, + "loss_ce": 0.008312612771987915, + "loss_iou": 0.384765625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 228214216, + "step": 2353 + }, + { + "epoch": 0.2301525224872898, + "grad_norm": 11.797862221477441, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 228311872, + "step": 2354 + }, + { + "epoch": 0.2301525224872898, + "loss": 0.08833163976669312, + "loss_ce": 0.006117286626249552, + "loss_iou": 0.345703125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 228311872, + "step": 2354 + }, + { + "epoch": 0.23025029331247557, + "grad_norm": 8.686118923363628, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 228408524, + "step": 2355 + }, + { + "epoch": 0.23025029331247557, + "loss": 0.0806407779455185, + "loss_ce": 0.003843288868665695, + "loss_iou": 0.29296875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 228408524, + "step": 2355 + }, + { + "epoch": 0.23034806413766132, + "grad_norm": 6.295694911029583, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 228506208, + "step": 2356 + }, + { + "epoch": 0.23034806413766132, + "loss": 0.10387638211250305, + "loss_ce": 0.009302401915192604, + "loss_iou": 0.30859375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 228506208, + "step": 2356 + }, + { + "epoch": 0.2304458349628471, + "grad_norm": 8.827469795602239, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 228603476, + "step": 2357 + }, + { + "epoch": 0.2304458349628471, + "loss": 0.07962758094072342, + "loss_ce": 0.003638810943812132, + "loss_iou": 0.41015625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 228603476, + "step": 2357 + }, + { + "epoch": 0.23054360578803285, + "grad_norm": 16.50662239046423, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 228700756, + "step": 2358 + }, + { + "epoch": 0.23054360578803285, + "loss": 0.09662991017103195, + "loss_ce": 0.001231953501701355, + "loss_iou": 0.3671875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 228700756, + "step": 2358 + }, + { + "epoch": 0.23064137661321862, + "grad_norm": 25.04390495945313, + "learning_rate": 5e-05, + "loss": 0.1311, + "num_input_tokens_seen": 228797936, + "step": 2359 + }, + { + "epoch": 0.23064137661321862, + "loss": 0.15486536920070648, + "loss_ce": 0.004047502297908068, + "loss_iou": 0.41796875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 228797936, + "step": 2359 + }, + { + "epoch": 0.23073914743840437, + "grad_norm": 8.558374130714544, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 228895544, + "step": 2360 + }, + { + "epoch": 0.23073914743840437, + "loss": 0.07067792117595673, + "loss_ce": 0.002318542916327715, + "loss_iou": 0.3984375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 228895544, + "step": 2360 + }, + { + "epoch": 0.23083691826359015, + "grad_norm": 5.219733351973028, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 228992612, + "step": 2361 + }, + { + "epoch": 0.23083691826359015, + "loss": 0.0924805998802185, + "loss_ce": 0.006119672209024429, + "loss_iou": 0.310546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 228992612, + "step": 2361 + }, + { + "epoch": 0.2309346890887759, + "grad_norm": 17.24128141735477, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 229090060, + "step": 2362 + }, + { + "epoch": 0.2309346890887759, + "loss": 0.10467052459716797, + "loss_ce": 0.0026655213441699743, + "loss_iou": 0.236328125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 229090060, + "step": 2362 + }, + { + "epoch": 0.23103245991396168, + "grad_norm": 10.234545857392694, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 229187668, + "step": 2363 + }, + { + "epoch": 0.23103245991396168, + "loss": 0.1091012954711914, + "loss_ce": 0.005059241317212582, + "loss_iou": 0.29296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 229187668, + "step": 2363 + }, + { + "epoch": 0.23113023073914743, + "grad_norm": 11.709228564854293, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 229285096, + "step": 2364 + }, + { + "epoch": 0.23113023073914743, + "loss": 0.11195030808448792, + "loss_ce": 0.0037349748890846968, + "loss_iou": 0.34765625, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 229285096, + "step": 2364 + }, + { + "epoch": 0.2312280015643332, + "grad_norm": 19.495451159154825, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 229382804, + "step": 2365 + }, + { + "epoch": 0.2312280015643332, + "loss": 0.08485373854637146, + "loss_ce": 0.0017543775029480457, + "loss_iou": 0.5078125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 229382804, + "step": 2365 + }, + { + "epoch": 0.23132577238951896, + "grad_norm": 11.265798376970837, + "learning_rate": 5e-05, + "loss": 0.1355, + "num_input_tokens_seen": 229478748, + "step": 2366 + }, + { + "epoch": 0.23132577238951896, + "loss": 0.16499149799346924, + "loss_ce": 0.0028210815507918596, + "loss_iou": 0.255859375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 229478748, + "step": 2366 + }, + { + "epoch": 0.23142354321470474, + "grad_norm": 15.5572846459296, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 229575364, + "step": 2367 + }, + { + "epoch": 0.23142354321470474, + "loss": 0.08761294186115265, + "loss_ce": 0.0077636935748159885, + "loss_iou": 0.333984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 229575364, + "step": 2367 + }, + { + "epoch": 0.23152131403989049, + "grad_norm": 12.75233126585603, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 229672924, + "step": 2368 + }, + { + "epoch": 0.23152131403989049, + "loss": 0.1295982301235199, + "loss_ce": 0.008489231579005718, + "loss_iou": 0.2412109375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 229672924, + "step": 2368 + }, + { + "epoch": 0.23161908486507626, + "grad_norm": 9.082325583675686, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 229769748, + "step": 2369 + }, + { + "epoch": 0.23161908486507626, + "loss": 0.09734505414962769, + "loss_ce": 0.005334554240107536, + "loss_iou": 0.3046875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 229769748, + "step": 2369 + }, + { + "epoch": 0.231716855690262, + "grad_norm": 2.581130014493191, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 229866136, + "step": 2370 + }, + { + "epoch": 0.231716855690262, + "loss": 0.06719280779361725, + "loss_ce": 0.005387080367654562, + "loss_iou": 0.244140625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 229866136, + "step": 2370 + }, + { + "epoch": 0.2318146265154478, + "grad_norm": 2.541919504973588, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 229963788, + "step": 2371 + }, + { + "epoch": 0.2318146265154478, + "loss": 0.08375377953052521, + "loss_ce": 0.0040113432332873344, + "loss_iou": 0.4296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 229963788, + "step": 2371 + }, + { + "epoch": 0.23191239734063354, + "grad_norm": 6.099325915333089, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 230061648, + "step": 2372 + }, + { + "epoch": 0.23191239734063354, + "loss": 0.05514635518193245, + "loss_ce": 0.005097529850900173, + "loss_iou": 0.2060546875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 230061648, + "step": 2372 + }, + { + "epoch": 0.23201016816581932, + "grad_norm": 4.943802600880462, + "learning_rate": 5e-05, + "loss": 0.1278, + "num_input_tokens_seen": 230158060, + "step": 2373 + }, + { + "epoch": 0.23201016816581932, + "loss": 0.1273876279592514, + "loss_ce": 0.011603938415646553, + "loss_iou": 0.296875, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 230158060, + "step": 2373 + }, + { + "epoch": 0.2321079389910051, + "grad_norm": 8.275471152493651, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 230255012, + "step": 2374 + }, + { + "epoch": 0.2321079389910051, + "loss": 0.06454180926084518, + "loss_ce": 0.005490293260663748, + "loss_iou": 0.29296875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 230255012, + "step": 2374 + }, + { + "epoch": 0.23220570981619085, + "grad_norm": 3.6930582874400297, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 230352744, + "step": 2375 + }, + { + "epoch": 0.23220570981619085, + "loss": 0.12006769329309464, + "loss_ce": 0.0071679092943668365, + "loss_iou": 0.341796875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 230352744, + "step": 2375 + }, + { + "epoch": 0.23230348064137663, + "grad_norm": 6.8826149376229155, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 230449620, + "step": 2376 + }, + { + "epoch": 0.23230348064137663, + "loss": 0.11296156048774719, + "loss_ce": 0.005417609587311745, + "loss_iou": 0.373046875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 230449620, + "step": 2376 + }, + { + "epoch": 0.23240125146656238, + "grad_norm": 9.376743124127321, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 230546756, + "step": 2377 + }, + { + "epoch": 0.23240125146656238, + "loss": 0.08459001779556274, + "loss_ce": 0.006312429439276457, + "loss_iou": 0.443359375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 230546756, + "step": 2377 + }, + { + "epoch": 0.23249902229174815, + "grad_norm": 13.030376342038068, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 230644244, + "step": 2378 + }, + { + "epoch": 0.23249902229174815, + "loss": 0.08601874113082886, + "loss_ce": 0.003773867152631283, + "loss_iou": 0.37890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 230644244, + "step": 2378 + }, + { + "epoch": 0.2325967931169339, + "grad_norm": 20.178239408370864, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 230741612, + "step": 2379 + }, + { + "epoch": 0.2325967931169339, + "loss": 0.09250777959823608, + "loss_ce": 0.004128870088607073, + "loss_iou": 0.51953125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 230741612, + "step": 2379 + }, + { + "epoch": 0.23269456394211968, + "grad_norm": 20.81437122554868, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 230838628, + "step": 2380 + }, + { + "epoch": 0.23269456394211968, + "loss": 0.08879300951957703, + "loss_ce": 0.010133953765034676, + "loss_iou": 0.3125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 230838628, + "step": 2380 + }, + { + "epoch": 0.23279233476730543, + "grad_norm": 10.712849698714516, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 230935928, + "step": 2381 + }, + { + "epoch": 0.23279233476730543, + "loss": 0.07414739578962326, + "loss_ce": 0.006550958380103111, + "loss_iou": 0.43359375, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 230935928, + "step": 2381 + }, + { + "epoch": 0.2328901055924912, + "grad_norm": 23.575109670717993, + "learning_rate": 5e-05, + "loss": 0.1398, + "num_input_tokens_seen": 231033552, + "step": 2382 + }, + { + "epoch": 0.2328901055924912, + "loss": 0.16593247652053833, + "loss_ce": 0.011101551353931427, + "loss_iou": 0.34375, + "loss_num": 0.0308837890625, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 231033552, + "step": 2382 + }, + { + "epoch": 0.23298787641767696, + "grad_norm": 14.004802566918624, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 231130368, + "step": 2383 + }, + { + "epoch": 0.23298787641767696, + "loss": 0.11292065680027008, + "loss_ce": 0.007177244871854782, + "loss_iou": 0.34375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 231130368, + "step": 2383 + }, + { + "epoch": 0.23308564724286274, + "grad_norm": 7.251174197006828, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 231227444, + "step": 2384 + }, + { + "epoch": 0.23308564724286274, + "loss": 0.057082995772361755, + "loss_ce": 0.004291400779038668, + "loss_iou": 0.24609375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 231227444, + "step": 2384 + }, + { + "epoch": 0.2331834180680485, + "grad_norm": 7.364112597326647, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 231324452, + "step": 2385 + }, + { + "epoch": 0.2331834180680485, + "loss": 0.1098579615354538, + "loss_ce": 0.004236623644828796, + "loss_iou": 0.33984375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 231324452, + "step": 2385 + }, + { + "epoch": 0.23328118889323426, + "grad_norm": 2.7210271459822577, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 231421464, + "step": 2386 + }, + { + "epoch": 0.23328118889323426, + "loss": 0.0678376853466034, + "loss_ce": 0.006558386608958244, + "loss_iou": 0.314453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 231421464, + "step": 2386 + }, + { + "epoch": 0.23337895971842001, + "grad_norm": 8.54537777748483, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 231518144, + "step": 2387 + }, + { + "epoch": 0.23337895971842001, + "loss": 0.11662045121192932, + "loss_ce": 0.0045446427538990974, + "loss_iou": 0.34765625, + "loss_num": 0.0223388671875, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 231518144, + "step": 2387 + }, + { + "epoch": 0.2334767305436058, + "grad_norm": 13.560553143394804, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 231615156, + "step": 2388 + }, + { + "epoch": 0.2334767305436058, + "loss": 0.10922961682081223, + "loss_ce": 0.005256232805550098, + "loss_iou": 0.328125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 231615156, + "step": 2388 + }, + { + "epoch": 0.23357450136879154, + "grad_norm": 6.2688972525569255, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 231712232, + "step": 2389 + }, + { + "epoch": 0.23357450136879154, + "loss": 0.0629098191857338, + "loss_ce": 0.0025841952301561832, + "loss_iou": 0.283203125, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 231712232, + "step": 2389 + }, + { + "epoch": 0.23367227219397732, + "grad_norm": 7.476796138202018, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 231809628, + "step": 2390 + }, + { + "epoch": 0.23367227219397732, + "loss": 0.08382914960384369, + "loss_ce": 0.008710135705769062, + "loss_iou": 0.330078125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 231809628, + "step": 2390 + }, + { + "epoch": 0.23377004301916307, + "grad_norm": 7.70638511224556, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 231906100, + "step": 2391 + }, + { + "epoch": 0.23377004301916307, + "loss": 0.05832827091217041, + "loss_ce": 0.00476229190826416, + "loss_iou": 0.271484375, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 231906100, + "step": 2391 + }, + { + "epoch": 0.23386781384434885, + "grad_norm": 11.816049067673477, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 232003324, + "step": 2392 + }, + { + "epoch": 0.23386781384434885, + "loss": 0.06783342361450195, + "loss_ce": 0.005684378556907177, + "loss_iou": 0.4453125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 232003324, + "step": 2392 + }, + { + "epoch": 0.2339655846695346, + "grad_norm": 13.283092252849974, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 232100536, + "step": 2393 + }, + { + "epoch": 0.2339655846695346, + "loss": 0.09759198129177094, + "loss_ce": 0.005268671549856663, + "loss_iou": 0.322265625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 232100536, + "step": 2393 + }, + { + "epoch": 0.23406335549472038, + "grad_norm": 8.776909313503067, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 232197620, + "step": 2394 + }, + { + "epoch": 0.23406335549472038, + "loss": 0.11236842721700668, + "loss_ce": 0.004443006590008736, + "loss_iou": 0.34765625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 232197620, + "step": 2394 + }, + { + "epoch": 0.23416112631990613, + "grad_norm": 13.172680850263369, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 232295332, + "step": 2395 + }, + { + "epoch": 0.23416112631990613, + "loss": 0.10182944685220718, + "loss_ce": 0.005515966098755598, + "loss_iou": 0.26953125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 232295332, + "step": 2395 + }, + { + "epoch": 0.2342588971450919, + "grad_norm": 9.120594004293617, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 232392020, + "step": 2396 + }, + { + "epoch": 0.2342588971450919, + "loss": 0.06321656703948975, + "loss_ce": 0.005935076158493757, + "loss_iou": 0.412109375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 232392020, + "step": 2396 + }, + { + "epoch": 0.23435666797027768, + "grad_norm": 9.756617235098284, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 232488736, + "step": 2397 + }, + { + "epoch": 0.23435666797027768, + "loss": 0.10426787286996841, + "loss_ce": 0.005467214621603489, + "loss_iou": 0.392578125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 232488736, + "step": 2397 + }, + { + "epoch": 0.23445443879546343, + "grad_norm": 14.06234858653447, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 232586364, + "step": 2398 + }, + { + "epoch": 0.23445443879546343, + "loss": 0.07447023689746857, + "loss_ce": 0.0022809109650552273, + "loss_iou": 0.345703125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 232586364, + "step": 2398 + }, + { + "epoch": 0.2345522096206492, + "grad_norm": 16.95591202150145, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 232683468, + "step": 2399 + }, + { + "epoch": 0.2345522096206492, + "loss": 0.09084704518318176, + "loss_ce": 0.004322076216340065, + "loss_iou": 0.337890625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 232683468, + "step": 2399 + }, + { + "epoch": 0.23464998044583496, + "grad_norm": 4.1352910528356075, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 232780252, + "step": 2400 + }, + { + "epoch": 0.23464998044583496, + "loss": 0.07570198178291321, + "loss_ce": 0.006091384217143059, + "loss_iou": 0.478515625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 232780252, + "step": 2400 + }, + { + "epoch": 0.23474775127102074, + "grad_norm": 8.71881854702898, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 232876816, + "step": 2401 + }, + { + "epoch": 0.23474775127102074, + "loss": 0.07825088500976562, + "loss_ce": 0.004611973185092211, + "loss_iou": 0.296875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 232876816, + "step": 2401 + }, + { + "epoch": 0.2348455220962065, + "grad_norm": 15.270632501384585, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 232974360, + "step": 2402 + }, + { + "epoch": 0.2348455220962065, + "loss": 0.11665214598178864, + "loss_ce": 0.005476600956171751, + "loss_iou": 0.3515625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 232974360, + "step": 2402 + }, + { + "epoch": 0.23494329292139227, + "grad_norm": 2.7007846194923895, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 233070860, + "step": 2403 + }, + { + "epoch": 0.23494329292139227, + "loss": 0.1298944056034088, + "loss_ce": 0.004802841227501631, + "loss_iou": 0.326171875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 233070860, + "step": 2403 + }, + { + "epoch": 0.23504106374657802, + "grad_norm": 11.486738340103154, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 233168160, + "step": 2404 + }, + { + "epoch": 0.23504106374657802, + "loss": 0.06599073112010956, + "loss_ce": 0.0018732987809926271, + "loss_iou": 0.423828125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 233168160, + "step": 2404 + }, + { + "epoch": 0.2351388345717638, + "grad_norm": 7.784896110273643, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 233265420, + "step": 2405 + }, + { + "epoch": 0.2351388345717638, + "loss": 0.09222416579723358, + "loss_ce": 0.0050506992265582085, + "loss_iou": 0.361328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 233265420, + "step": 2405 + }, + { + "epoch": 0.23523660539694954, + "grad_norm": 10.257143111207409, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 233361672, + "step": 2406 + }, + { + "epoch": 0.23523660539694954, + "loss": 0.08886419236660004, + "loss_ce": 0.005093436688184738, + "loss_iou": 0.2734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 233361672, + "step": 2406 + }, + { + "epoch": 0.23533437622213532, + "grad_norm": 24.35741410229813, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 233458100, + "step": 2407 + }, + { + "epoch": 0.23533437622213532, + "loss": 0.10638512670993805, + "loss_ce": 0.006043328903615475, + "loss_iou": 0.357421875, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 233458100, + "step": 2407 + }, + { + "epoch": 0.23543214704732107, + "grad_norm": 5.692232046673268, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 233554464, + "step": 2408 + }, + { + "epoch": 0.23543214704732107, + "loss": 0.11103459447622299, + "loss_ce": 0.00776310358196497, + "loss_iou": 0.3828125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 233554464, + "step": 2408 + }, + { + "epoch": 0.23552991787250685, + "grad_norm": 3.7659329262523027, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 233650908, + "step": 2409 + }, + { + "epoch": 0.23552991787250685, + "loss": 0.07364127784967422, + "loss_ce": 0.0027184258215129375, + "loss_iou": 0.37890625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 233650908, + "step": 2409 + }, + { + "epoch": 0.2356276886976926, + "grad_norm": 18.2579598214879, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 233747996, + "step": 2410 + }, + { + "epoch": 0.2356276886976926, + "loss": 0.038448259234428406, + "loss_ce": 0.004245685879141092, + "loss_iou": 0.294921875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 233747996, + "step": 2410 + }, + { + "epoch": 0.23572545952287838, + "grad_norm": 6.580648798689925, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 233844972, + "step": 2411 + }, + { + "epoch": 0.23572545952287838, + "loss": 0.13631272315979004, + "loss_ce": 0.004690408706665039, + "loss_iou": 0.375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 233844972, + "step": 2411 + }, + { + "epoch": 0.23582323034806413, + "grad_norm": 17.557279482759178, + "learning_rate": 5e-05, + "loss": 0.1019, + "num_input_tokens_seen": 233942448, + "step": 2412 + }, + { + "epoch": 0.23582323034806413, + "loss": 0.1283896118402481, + "loss_ce": 0.0032065045088529587, + "loss_iou": 0.33984375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 233942448, + "step": 2412 + }, + { + "epoch": 0.2359210011732499, + "grad_norm": 13.695655606142196, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 234039276, + "step": 2413 + }, + { + "epoch": 0.2359210011732499, + "loss": 0.0787927508354187, + "loss_ce": 0.009868798777461052, + "loss_iou": 0.310546875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 234039276, + "step": 2413 + }, + { + "epoch": 0.23601877199843566, + "grad_norm": 3.2453564030819724, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 234136820, + "step": 2414 + }, + { + "epoch": 0.23601877199843566, + "loss": 0.1045624315738678, + "loss_ce": 0.00553289707750082, + "loss_iou": 0.283203125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 234136820, + "step": 2414 + }, + { + "epoch": 0.23611654282362143, + "grad_norm": 3.331033625955296, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 234233756, + "step": 2415 + }, + { + "epoch": 0.23611654282362143, + "loss": 0.09380178898572922, + "loss_ce": 0.0037901969626545906, + "loss_iou": 0.41796875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 234233756, + "step": 2415 + }, + { + "epoch": 0.23621431364880718, + "grad_norm": 5.488579784027597, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 234330884, + "step": 2416 + }, + { + "epoch": 0.23621431364880718, + "loss": 0.0951603502035141, + "loss_ce": 0.007727490738034248, + "loss_iou": 0.298828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 234330884, + "step": 2416 + }, + { + "epoch": 0.23631208447399296, + "grad_norm": 10.87981630336449, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 234427624, + "step": 2417 + }, + { + "epoch": 0.23631208447399296, + "loss": 0.08435241132974625, + "loss_ce": 0.004823603201657534, + "loss_iou": 0.376953125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 234427624, + "step": 2417 + }, + { + "epoch": 0.2364098552991787, + "grad_norm": 7.952630445412369, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 234524600, + "step": 2418 + }, + { + "epoch": 0.2364098552991787, + "loss": 0.05332999676465988, + "loss_ce": 0.00865226425230503, + "loss_iou": 0.30078125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 234524600, + "step": 2418 + }, + { + "epoch": 0.2365076261243645, + "grad_norm": 7.189706453992019, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 234620944, + "step": 2419 + }, + { + "epoch": 0.2365076261243645, + "loss": 0.09712477028369904, + "loss_ce": 0.005678849760442972, + "loss_iou": 0.279296875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 234620944, + "step": 2419 + }, + { + "epoch": 0.23660539694955027, + "grad_norm": 21.84708012108915, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 234718840, + "step": 2420 + }, + { + "epoch": 0.23660539694955027, + "loss": 0.09175440669059753, + "loss_ce": 0.009662117809057236, + "loss_iou": 0.462890625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 234718840, + "step": 2420 + }, + { + "epoch": 0.23670316777473602, + "grad_norm": 19.88335672803981, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 234815676, + "step": 2421 + }, + { + "epoch": 0.23670316777473602, + "loss": 0.11558495461940765, + "loss_ce": 0.00660668546333909, + "loss_iou": 0.341796875, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 234815676, + "step": 2421 + }, + { + "epoch": 0.2368009385999218, + "grad_norm": 2.150058943000897, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 234912252, + "step": 2422 + }, + { + "epoch": 0.2368009385999218, + "loss": 0.08601455390453339, + "loss_ce": 0.0051963794976472855, + "loss_iou": 0.30859375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 234912252, + "step": 2422 + }, + { + "epoch": 0.23689870942510755, + "grad_norm": 6.006749390916857, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 235008596, + "step": 2423 + }, + { + "epoch": 0.23689870942510755, + "loss": 0.08828110247850418, + "loss_ce": 0.008683627471327782, + "loss_iou": 0.2490234375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 235008596, + "step": 2423 + }, + { + "epoch": 0.23699648025029332, + "grad_norm": 2.277152015680311, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 235105768, + "step": 2424 + }, + { + "epoch": 0.23699648025029332, + "loss": 0.04788979887962341, + "loss_ce": 0.001899809343740344, + "loss_iou": 0.279296875, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 235105768, + "step": 2424 + }, + { + "epoch": 0.23709425107547907, + "grad_norm": 5.045085710586731, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 235202900, + "step": 2425 + }, + { + "epoch": 0.23709425107547907, + "loss": 0.09661128371953964, + "loss_ce": 0.010093947872519493, + "loss_iou": 0.3125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 235202900, + "step": 2425 + }, + { + "epoch": 0.23719202190066485, + "grad_norm": 6.388018508316817, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 235300392, + "step": 2426 + }, + { + "epoch": 0.23719202190066485, + "loss": 0.07097093760967255, + "loss_ce": 0.005754871293902397, + "loss_iou": 0.361328125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 235300392, + "step": 2426 + }, + { + "epoch": 0.2372897927258506, + "grad_norm": 17.29563642789241, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 235397032, + "step": 2427 + }, + { + "epoch": 0.2372897927258506, + "loss": 0.10177178680896759, + "loss_ce": 0.005916071590036154, + "loss_iou": 0.443359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 235397032, + "step": 2427 + }, + { + "epoch": 0.23738756355103638, + "grad_norm": 4.019985386258096, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 235493676, + "step": 2428 + }, + { + "epoch": 0.23738756355103638, + "loss": 0.10136379301548004, + "loss_ce": 0.006637236103415489, + "loss_iou": 0.333984375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 235493676, + "step": 2428 + }, + { + "epoch": 0.23748533437622213, + "grad_norm": 14.554880194269055, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 235590456, + "step": 2429 + }, + { + "epoch": 0.23748533437622213, + "loss": 0.07397417724132538, + "loss_ce": 0.001830618130043149, + "loss_iou": 0.326171875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 235590456, + "step": 2429 + }, + { + "epoch": 0.2375831052014079, + "grad_norm": 12.026428444284269, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 235688020, + "step": 2430 + }, + { + "epoch": 0.2375831052014079, + "loss": 0.08157958090305328, + "loss_ce": 0.0026000903453677893, + "loss_iou": 0.55859375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 235688020, + "step": 2430 + }, + { + "epoch": 0.23768087602659366, + "grad_norm": 9.146868437037972, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 235784912, + "step": 2431 + }, + { + "epoch": 0.23768087602659366, + "loss": 0.1259390115737915, + "loss_ce": 0.014153127558529377, + "loss_iou": 0.27734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 235784912, + "step": 2431 + }, + { + "epoch": 0.23777864685177943, + "grad_norm": 6.937868963123714, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 235881464, + "step": 2432 + }, + { + "epoch": 0.23777864685177943, + "loss": 0.09826724976301193, + "loss_ce": 0.0058142454363405704, + "loss_iou": 0.369140625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 235881464, + "step": 2432 + }, + { + "epoch": 0.23787641767696519, + "grad_norm": 3.185878621934139, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 235977808, + "step": 2433 + }, + { + "epoch": 0.23787641767696519, + "loss": 0.07199335098266602, + "loss_ce": 0.0042138113640248775, + "loss_iou": 0.31640625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 235977808, + "step": 2433 + }, + { + "epoch": 0.23797418850215096, + "grad_norm": 4.429683433885201, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 236074204, + "step": 2434 + }, + { + "epoch": 0.23797418850215096, + "loss": 0.09956706315279007, + "loss_ce": 0.01198161393404007, + "loss_iou": 0.2890625, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 236074204, + "step": 2434 + }, + { + "epoch": 0.2380719593273367, + "grad_norm": 16.17977062607371, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 236170852, + "step": 2435 + }, + { + "epoch": 0.2380719593273367, + "loss": 0.1209758073091507, + "loss_ce": 0.008159944787621498, + "loss_iou": 0.271484375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 236170852, + "step": 2435 + }, + { + "epoch": 0.2381697301525225, + "grad_norm": 13.300900052121067, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 236269188, + "step": 2436 + }, + { + "epoch": 0.2381697301525225, + "loss": 0.062181368470191956, + "loss_ce": 0.005082979798316956, + "loss_iou": 0.44921875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 236269188, + "step": 2436 + }, + { + "epoch": 0.23826750097770824, + "grad_norm": 8.414323595265724, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 236366436, + "step": 2437 + }, + { + "epoch": 0.23826750097770824, + "loss": 0.06075594574213028, + "loss_ce": 0.004740929696708918, + "loss_iou": 0.314453125, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 236366436, + "step": 2437 + }, + { + "epoch": 0.23836527180289402, + "grad_norm": 22.974310768549458, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 236462920, + "step": 2438 + }, + { + "epoch": 0.23836527180289402, + "loss": 0.11284761130809784, + "loss_ce": 0.005563067737966776, + "loss_iou": 0.31640625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 236462920, + "step": 2438 + }, + { + "epoch": 0.23846304262807977, + "grad_norm": 42.33863603636341, + "learning_rate": 5e-05, + "loss": 0.1292, + "num_input_tokens_seen": 236560764, + "step": 2439 + }, + { + "epoch": 0.23846304262807977, + "loss": 0.13790249824523926, + "loss_ce": 0.03885768726468086, + "loss_iou": 0.349609375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 236560764, + "step": 2439 + }, + { + "epoch": 0.23856081345326555, + "grad_norm": 17.151393949395356, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 236656688, + "step": 2440 + }, + { + "epoch": 0.23856081345326555, + "loss": 0.07442402839660645, + "loss_ce": 0.0030281557701528072, + "loss_iou": 0.33203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 236656688, + "step": 2440 + }, + { + "epoch": 0.2386585842784513, + "grad_norm": 2.9690939952733544, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 236753828, + "step": 2441 + }, + { + "epoch": 0.2386585842784513, + "loss": 0.0964440256357193, + "loss_ce": 0.005761044565588236, + "loss_iou": 0.337890625, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 236753828, + "step": 2441 + }, + { + "epoch": 0.23875635510363707, + "grad_norm": 18.24935034662943, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 236848844, + "step": 2442 + }, + { + "epoch": 0.23875635510363707, + "loss": 0.1073075532913208, + "loss_ce": 0.004676932469010353, + "loss_iou": 0.3046875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 236848844, + "step": 2442 + }, + { + "epoch": 0.23885412592882285, + "grad_norm": 12.193392904471287, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 236945136, + "step": 2443 + }, + { + "epoch": 0.23885412592882285, + "loss": 0.08028081059455872, + "loss_ce": 0.004795586224645376, + "loss_iou": 0.287109375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 236945136, + "step": 2443 + }, + { + "epoch": 0.2389518967540086, + "grad_norm": 16.122263868375825, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 237042328, + "step": 2444 + }, + { + "epoch": 0.2389518967540086, + "loss": 0.0899471640586853, + "loss_ce": 0.0068172793835401535, + "loss_iou": 0.42578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 237042328, + "step": 2444 + }, + { + "epoch": 0.23904966757919438, + "grad_norm": 2.982388105011001, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 237139116, + "step": 2445 + }, + { + "epoch": 0.23904966757919438, + "loss": 0.05891786888241768, + "loss_ce": 0.0029486296698451042, + "loss_iou": 0.3828125, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 237139116, + "step": 2445 + }, + { + "epoch": 0.23914743840438013, + "grad_norm": 56.73456630627092, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 237236624, + "step": 2446 + }, + { + "epoch": 0.23914743840438013, + "loss": 0.10598913580179214, + "loss_ce": 0.01074377540498972, + "loss_iou": 0.388671875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 237236624, + "step": 2446 + }, + { + "epoch": 0.2392452092295659, + "grad_norm": 4.405087375169422, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 237333480, + "step": 2447 + }, + { + "epoch": 0.2392452092295659, + "loss": 0.15410029888153076, + "loss_ce": 0.005250819958746433, + "loss_iou": 0.310546875, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 237333480, + "step": 2447 + }, + { + "epoch": 0.23934298005475166, + "grad_norm": 3.4663865474958757, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 237430420, + "step": 2448 + }, + { + "epoch": 0.23934298005475166, + "loss": 0.10296225547790527, + "loss_ce": 0.00440573412925005, + "loss_iou": 0.373046875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 237430420, + "step": 2448 + }, + { + "epoch": 0.23944075087993744, + "grad_norm": 3.1860306842097725, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 237526628, + "step": 2449 + }, + { + "epoch": 0.23944075087993744, + "loss": 0.0807388424873352, + "loss_ce": 0.006947340909391642, + "loss_iou": 0.26953125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 237526628, + "step": 2449 + }, + { + "epoch": 0.2395385217051232, + "grad_norm": 2.9332894079716536, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 237623192, + "step": 2450 + }, + { + "epoch": 0.2395385217051232, + "loss": 0.04628314822912216, + "loss_ce": 0.004351994954049587, + "loss_iou": 0.37890625, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 237623192, + "step": 2450 + }, + { + "epoch": 0.23963629253030896, + "grad_norm": 13.684233190710826, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 237720728, + "step": 2451 + }, + { + "epoch": 0.23963629253030896, + "loss": 0.11555741727352142, + "loss_ce": 0.005144826602190733, + "loss_iou": 0.408203125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 237720728, + "step": 2451 + }, + { + "epoch": 0.23973406335549471, + "grad_norm": 5.030394309139214, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 237817620, + "step": 2452 + }, + { + "epoch": 0.23973406335549471, + "loss": 0.07355140894651413, + "loss_ce": 0.002628554590046406, + "loss_iou": 0.390625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 237817620, + "step": 2452 + }, + { + "epoch": 0.2398318341806805, + "grad_norm": 2.1516732811076706, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 237913988, + "step": 2453 + }, + { + "epoch": 0.2398318341806805, + "loss": 0.1261126697063446, + "loss_ce": 0.009916996583342552, + "loss_iou": 0.275390625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 237913988, + "step": 2453 + }, + { + "epoch": 0.23992960500586624, + "grad_norm": 10.813266678751638, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 238011008, + "step": 2454 + }, + { + "epoch": 0.23992960500586624, + "loss": 0.10689367353916168, + "loss_ce": 0.005239618942141533, + "loss_iou": 0.412109375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 238011008, + "step": 2454 + }, + { + "epoch": 0.24002737583105202, + "grad_norm": 27.826786700937653, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 238107824, + "step": 2455 + }, + { + "epoch": 0.24002737583105202, + "loss": 0.11013685911893845, + "loss_ce": 0.004729150794446468, + "loss_iou": 0.3671875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 238107824, + "step": 2455 + }, + { + "epoch": 0.24012514665623777, + "grad_norm": 11.70317992105103, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 238204932, + "step": 2456 + }, + { + "epoch": 0.24012514665623777, + "loss": 0.09094120562076569, + "loss_ce": 0.003538859076797962, + "loss_iou": 0.41796875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 238204932, + "step": 2456 + }, + { + "epoch": 0.24022291748142355, + "grad_norm": 4.211031038209204, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 238302340, + "step": 2457 + }, + { + "epoch": 0.24022291748142355, + "loss": 0.13753806054592133, + "loss_ce": 0.0035353777930140495, + "loss_iou": 0.44921875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 238302340, + "step": 2457 + }, + { + "epoch": 0.2403206883066093, + "grad_norm": 9.090158181955452, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 238399460, + "step": 2458 + }, + { + "epoch": 0.2403206883066093, + "loss": 0.11624880880117416, + "loss_ce": 0.006568627431988716, + "loss_iou": 0.384765625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 238399460, + "step": 2458 + }, + { + "epoch": 0.24041845913179508, + "grad_norm": 5.404304012279317, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 238496528, + "step": 2459 + }, + { + "epoch": 0.24041845913179508, + "loss": 0.06813715398311615, + "loss_ce": 0.004736886825412512, + "loss_iou": 0.3515625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 238496528, + "step": 2459 + }, + { + "epoch": 0.24051622995698083, + "grad_norm": 5.375590080479122, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 238592784, + "step": 2460 + }, + { + "epoch": 0.24051622995698083, + "loss": 0.09344090521335602, + "loss_ce": 0.0034293062053620815, + "loss_iou": 0.287109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 238592784, + "step": 2460 + }, + { + "epoch": 0.2406140007821666, + "grad_norm": 7.959163903826873, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 238689636, + "step": 2461 + }, + { + "epoch": 0.2406140007821666, + "loss": 0.09189170598983765, + "loss_ce": 0.004306253045797348, + "loss_iou": 0.396484375, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 238689636, + "step": 2461 + }, + { + "epoch": 0.24071177160735235, + "grad_norm": 6.087008725378361, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 238786740, + "step": 2462 + }, + { + "epoch": 0.24071177160735235, + "loss": 0.08002211153507233, + "loss_ce": 0.0044300733134150505, + "loss_iou": 0.34765625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 238786740, + "step": 2462 + }, + { + "epoch": 0.24080954243253813, + "grad_norm": 11.229787225001667, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 238884888, + "step": 2463 + }, + { + "epoch": 0.24080954243253813, + "loss": 0.09261922538280487, + "loss_ce": 0.0014327013632282615, + "loss_iou": 0.41015625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 238884888, + "step": 2463 + }, + { + "epoch": 0.24090731325772388, + "grad_norm": 15.63742023626929, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 238981764, + "step": 2464 + }, + { + "epoch": 0.24090731325772388, + "loss": 0.10217928141355515, + "loss_ce": 0.004233113490045071, + "loss_iou": 0.31640625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 238981764, + "step": 2464 + }, + { + "epoch": 0.24100508408290966, + "grad_norm": 2.4381020331039642, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 239079376, + "step": 2465 + }, + { + "epoch": 0.24100508408290966, + "loss": 0.07796043902635574, + "loss_ce": 0.002536246320232749, + "loss_iou": 0.369140625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 239079376, + "step": 2465 + }, + { + "epoch": 0.24110285490809544, + "grad_norm": 5.454852341428388, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 239176540, + "step": 2466 + }, + { + "epoch": 0.24110285490809544, + "loss": 0.11803455650806427, + "loss_ce": 0.007225234992802143, + "loss_iou": 0.29296875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 239176540, + "step": 2466 + }, + { + "epoch": 0.2412006257332812, + "grad_norm": 20.858090404976007, + "learning_rate": 5e-05, + "loss": 0.1228, + "num_input_tokens_seen": 239273380, + "step": 2467 + }, + { + "epoch": 0.2412006257332812, + "loss": 0.13215816020965576, + "loss_ce": 0.006608844734728336, + "loss_iou": 0.3125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 239273380, + "step": 2467 + }, + { + "epoch": 0.24129839655846697, + "grad_norm": 7.379672747173339, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 239370220, + "step": 2468 + }, + { + "epoch": 0.24129839655846697, + "loss": 0.09538351744413376, + "loss_ce": 0.009110324084758759, + "loss_iou": 0.404296875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 239370220, + "step": 2468 + }, + { + "epoch": 0.24139616738365272, + "grad_norm": 2.0821347007553315, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 239466100, + "step": 2469 + }, + { + "epoch": 0.24139616738365272, + "loss": 0.05303116515278816, + "loss_ce": 0.0032264781184494495, + "loss_iou": 0.296875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 239466100, + "step": 2469 + }, + { + "epoch": 0.2414939382088385, + "grad_norm": 6.757356084519498, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 239562272, + "step": 2470 + }, + { + "epoch": 0.2414939382088385, + "loss": 0.08177734911441803, + "loss_ce": 0.004934094380587339, + "loss_iou": 0.244140625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 239562272, + "step": 2470 + }, + { + "epoch": 0.24159170903402424, + "grad_norm": 7.831994872571645, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 239660516, + "step": 2471 + }, + { + "epoch": 0.24159170903402424, + "loss": 0.09976276755332947, + "loss_ce": 0.0034798122942447662, + "loss_iou": 0.4140625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 239660516, + "step": 2471 + }, + { + "epoch": 0.24168947985921002, + "grad_norm": 9.043630894276184, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 239756892, + "step": 2472 + }, + { + "epoch": 0.24168947985921002, + "loss": 0.10241232812404633, + "loss_ce": 0.005572418682277203, + "loss_iou": 0.283203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 239756892, + "step": 2472 + }, + { + "epoch": 0.24178725068439577, + "grad_norm": 3.3253010294627443, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 239853988, + "step": 2473 + }, + { + "epoch": 0.24178725068439577, + "loss": 0.09701946377754211, + "loss_ce": 0.006717951036989689, + "loss_iou": 0.390625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 239853988, + "step": 2473 + }, + { + "epoch": 0.24188502150958155, + "grad_norm": 5.4130105948798155, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 239951300, + "step": 2474 + }, + { + "epoch": 0.24188502150958155, + "loss": 0.1496400386095047, + "loss_ce": 0.005322406068444252, + "loss_iou": 0.337890625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 239951300, + "step": 2474 + }, + { + "epoch": 0.2419827923347673, + "grad_norm": 3.580832387011587, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 240047596, + "step": 2475 + }, + { + "epoch": 0.2419827923347673, + "loss": 0.11284423619508743, + "loss_ce": 0.005681759677827358, + "loss_iou": 0.3046875, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 240047596, + "step": 2475 + }, + { + "epoch": 0.24208056315995308, + "grad_norm": 3.9699739590819694, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 240144740, + "step": 2476 + }, + { + "epoch": 0.24208056315995308, + "loss": 0.07967371493577957, + "loss_ce": 0.003959605470299721, + "loss_iou": 0.359375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 240144740, + "step": 2476 + }, + { + "epoch": 0.24217833398513883, + "grad_norm": 5.076410551373708, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 240240224, + "step": 2477 + }, + { + "epoch": 0.24217833398513883, + "loss": 0.10829201340675354, + "loss_ce": 0.0049823797307908535, + "loss_iou": 0.2353515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 240240224, + "step": 2477 + }, + { + "epoch": 0.2422761048103246, + "grad_norm": 3.6031776801299755, + "learning_rate": 5e-05, + "loss": 0.1285, + "num_input_tokens_seen": 240335776, + "step": 2478 + }, + { + "epoch": 0.2422761048103246, + "loss": 0.16064882278442383, + "loss_ce": 0.011303430423140526, + "loss_iou": 0.21875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 240335776, + "step": 2478 + }, + { + "epoch": 0.24237387563551036, + "grad_norm": 10.004323716030084, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 240432772, + "step": 2479 + }, + { + "epoch": 0.24237387563551036, + "loss": 0.0979713723063469, + "loss_ce": 0.00429766159504652, + "loss_iou": 0.330078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 240432772, + "step": 2479 + }, + { + "epoch": 0.24247164646069613, + "grad_norm": 4.607677838301546, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 240529772, + "step": 2480 + }, + { + "epoch": 0.24247164646069613, + "loss": 0.11320209503173828, + "loss_ce": 0.0069093648344278336, + "loss_iou": 0.40234375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 240529772, + "step": 2480 + }, + { + "epoch": 0.24256941728588188, + "grad_norm": 14.0202046004871, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 240626680, + "step": 2481 + }, + { + "epoch": 0.24256941728588188, + "loss": 0.052681129425764084, + "loss_ce": 0.004074258729815483, + "loss_iou": 0.390625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 240626680, + "step": 2481 + }, + { + "epoch": 0.24266718811106766, + "grad_norm": 10.745626157442867, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 240723368, + "step": 2482 + }, + { + "epoch": 0.24266718811106766, + "loss": 0.1085299700498581, + "loss_ce": 0.00576965743675828, + "loss_iou": 0.30078125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 240723368, + "step": 2482 + }, + { + "epoch": 0.2427649589362534, + "grad_norm": 4.3427801059559865, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 240820636, + "step": 2483 + }, + { + "epoch": 0.2427649589362534, + "loss": 0.129660502076149, + "loss_ce": 0.012930769473314285, + "loss_iou": 0.240234375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 240820636, + "step": 2483 + }, + { + "epoch": 0.2428627297614392, + "grad_norm": 8.521293367689681, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 240916872, + "step": 2484 + }, + { + "epoch": 0.2428627297614392, + "loss": 0.03664221242070198, + "loss_ce": 0.005743163637816906, + "loss_iou": 0.2255859375, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 240916872, + "step": 2484 + }, + { + "epoch": 0.24296050058662494, + "grad_norm": 14.81921605452105, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 241014088, + "step": 2485 + }, + { + "epoch": 0.24296050058662494, + "loss": 0.061043016612529755, + "loss_ce": 0.0029528108425438404, + "loss_iou": 0.3203125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 241014088, + "step": 2485 + }, + { + "epoch": 0.24305827141181072, + "grad_norm": 15.659088039469582, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 241111264, + "step": 2486 + }, + { + "epoch": 0.24305827141181072, + "loss": 0.14819878339767456, + "loss_ce": 0.004827210679650307, + "loss_iou": 0.392578125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 241111264, + "step": 2486 + }, + { + "epoch": 0.24315604223699647, + "grad_norm": 12.058751386218228, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 241208512, + "step": 2487 + }, + { + "epoch": 0.24315604223699647, + "loss": 0.0620865672826767, + "loss_ce": 0.006391985807567835, + "loss_iou": 0.30078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 241208512, + "step": 2487 + }, + { + "epoch": 0.24325381306218224, + "grad_norm": 4.851651982371493, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 241304724, + "step": 2488 + }, + { + "epoch": 0.24325381306218224, + "loss": 0.15008169412612915, + "loss_ce": 0.00508506316691637, + "loss_iou": 0.271484375, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 241304724, + "step": 2488 + }, + { + "epoch": 0.24335158388736802, + "grad_norm": 3.072402938230723, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 241401452, + "step": 2489 + }, + { + "epoch": 0.24335158388736802, + "loss": 0.09991658478975296, + "loss_ce": 0.007936605252325535, + "loss_iou": 0.3515625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 241401452, + "step": 2489 + }, + { + "epoch": 0.24344935471255377, + "grad_norm": 12.77494241570453, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 241497640, + "step": 2490 + }, + { + "epoch": 0.24344935471255377, + "loss": 0.10431502759456635, + "loss_ce": 0.005727986339479685, + "loss_iou": 0.26171875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 241497640, + "step": 2490 + }, + { + "epoch": 0.24354712553773955, + "grad_norm": 32.11552863185334, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 241593468, + "step": 2491 + }, + { + "epoch": 0.24354712553773955, + "loss": 0.049980126321315765, + "loss_ce": 0.005485495552420616, + "loss_iou": 0.2001953125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 241593468, + "step": 2491 + }, + { + "epoch": 0.2436448963629253, + "grad_norm": 5.603226693932682, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 241689812, + "step": 2492 + }, + { + "epoch": 0.2436448963629253, + "loss": 0.058884456753730774, + "loss_ce": 0.007912471890449524, + "loss_iou": 0.31640625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 241689812, + "step": 2492 + }, + { + "epoch": 0.24374266718811108, + "grad_norm": 14.342952872949287, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 241786000, + "step": 2493 + }, + { + "epoch": 0.24374266718811108, + "loss": 0.11003781855106354, + "loss_ce": 0.008963599801063538, + "loss_iou": 0.310546875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 241786000, + "step": 2493 + }, + { + "epoch": 0.24384043801329683, + "grad_norm": 5.360321631377379, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 241882576, + "step": 2494 + }, + { + "epoch": 0.24384043801329683, + "loss": 0.07075206190347672, + "loss_ce": 0.00352184078656137, + "loss_iou": 0.384765625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 241882576, + "step": 2494 + }, + { + "epoch": 0.2439382088384826, + "grad_norm": 5.8719773976235565, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 241978616, + "step": 2495 + }, + { + "epoch": 0.2439382088384826, + "loss": 0.12227799743413925, + "loss_ce": 0.005212568212300539, + "loss_iou": 0.376953125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 241978616, + "step": 2495 + }, + { + "epoch": 0.24403597966366836, + "grad_norm": 4.446838424806776, + "learning_rate": 5e-05, + "loss": 0.1343, + "num_input_tokens_seen": 242075656, + "step": 2496 + }, + { + "epoch": 0.24403597966366836, + "loss": 0.13211283087730408, + "loss_ce": 0.006761875469237566, + "loss_iou": 0.302734375, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 242075656, + "step": 2496 + }, + { + "epoch": 0.24413375048885413, + "grad_norm": 6.803195158521457, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 242172536, + "step": 2497 + }, + { + "epoch": 0.24413375048885413, + "loss": 0.06573328375816345, + "loss_ce": 0.004576060920953751, + "loss_iou": 0.419921875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 242172536, + "step": 2497 + }, + { + "epoch": 0.24423152131403988, + "grad_norm": 3.6148168105322553, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 242269448, + "step": 2498 + }, + { + "epoch": 0.24423152131403988, + "loss": 0.16030974686145782, + "loss_ce": 0.008667895570397377, + "loss_iou": 0.216796875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 242269448, + "step": 2498 + }, + { + "epoch": 0.24432929213922566, + "grad_norm": 10.277607370704681, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 242366792, + "step": 2499 + }, + { + "epoch": 0.24432929213922566, + "loss": 0.10159774124622345, + "loss_ce": 0.006443932186812162, + "loss_iou": 0.3828125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 242366792, + "step": 2499 + }, + { + "epoch": 0.2444270629644114, + "grad_norm": 10.058432380526874, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 242463624, + "step": 2500 + }, + { + "epoch": 0.2444270629644114, + "eval_seeclick_CIoU": 0.456955686211586, + "eval_seeclick_GIoU": 0.44493143260478973, + "eval_seeclick_IoU": 0.5038799345493317, + "eval_seeclick_MAE_all": 0.08154462277889252, + "eval_seeclick_MAE_h": 0.04633082076907158, + "eval_seeclick_MAE_w": 0.11531856283545494, + "eval_seeclick_MAE_x": 0.11909274011850357, + "eval_seeclick_MAE_y": 0.04543637856841087, + "eval_seeclick_NUM_probability": 0.9999823570251465, + "eval_seeclick_inside_bbox": 0.7386363744735718, + "eval_seeclick_loss": 0.29036325216293335, + "eval_seeclick_loss_ce": 0.009835743345320225, + "eval_seeclick_loss_iou": 0.4991455078125, + "eval_seeclick_loss_num": 0.058258056640625, + "eval_seeclick_loss_xval": 0.291168212890625, + "eval_seeclick_runtime": 73.2648, + "eval_seeclick_samples_per_second": 0.587, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 242463624, + "step": 2500 + }, + { + "epoch": 0.2444270629644114, + "eval_icons_CIoU": 0.6740144193172455, + "eval_icons_GIoU": 0.6794475317001343, + "eval_icons_IoU": 0.7033596038818359, + "eval_icons_MAE_all": 0.04739897698163986, + "eval_icons_MAE_h": 0.051479702815413475, + "eval_icons_MAE_w": 0.046280572190880775, + "eval_icons_MAE_x": 0.043273068964481354, + "eval_icons_MAE_y": 0.04856256954371929, + "eval_icons_NUM_probability": 0.9998749196529388, + "eval_icons_inside_bbox": 0.8315972089767456, + "eval_icons_loss": 0.16529425978660583, + "eval_icons_loss_ce": 3.973097227571998e-05, + "eval_icons_loss_iou": 0.414794921875, + "eval_icons_loss_num": 0.034740447998046875, + "eval_icons_loss_xval": 0.1737060546875, + "eval_icons_runtime": 86.8982, + "eval_icons_samples_per_second": 0.575, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 242463624, + "step": 2500 + }, + { + "epoch": 0.2444270629644114, + "eval_screenspot_CIoU": 0.3220541427532832, + "eval_screenspot_GIoU": 0.2988821119070053, + "eval_screenspot_IoU": 0.4150758882363637, + "eval_screenspot_MAE_all": 0.15952625373999277, + "eval_screenspot_MAE_h": 0.1172297125061353, + "eval_screenspot_MAE_w": 0.22100184857845306, + "eval_screenspot_MAE_x": 0.19023163616657257, + "eval_screenspot_MAE_y": 0.1096418301264445, + "eval_screenspot_NUM_probability": 0.9999736150105795, + "eval_screenspot_inside_bbox": 0.6612499952316284, + "eval_screenspot_loss": 0.5648451447486877, + "eval_screenspot_loss_ce": 0.03567525868614515, + "eval_screenspot_loss_iou": 0.3648274739583333, + "eval_screenspot_loss_num": 0.10643513997395833, + "eval_screenspot_loss_xval": 0.5321451822916666, + "eval_screenspot_runtime": 144.7464, + "eval_screenspot_samples_per_second": 0.615, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 242463624, + "step": 2500 + }, + { + "epoch": 0.2444270629644114, + "eval_compot_CIoU": 0.4751313179731369, + "eval_compot_GIoU": 0.45993776619434357, + "eval_compot_IoU": 0.5409588515758514, + "eval_compot_MAE_all": 0.09935053065419197, + "eval_compot_MAE_h": 0.06821273267269135, + "eval_compot_MAE_w": 0.131488636136055, + "eval_compot_MAE_x": 0.12996817380189896, + "eval_compot_MAE_y": 0.06773257814347744, + "eval_compot_NUM_probability": 0.9999727010726929, + "eval_compot_inside_bbox": 0.7083333432674408, + "eval_compot_loss": 0.3363632261753082, + "eval_compot_loss_ce": 0.038959529250860214, + "eval_compot_loss_iou": 0.500732421875, + "eval_compot_loss_num": 0.052478790283203125, + "eval_compot_loss_xval": 0.2623291015625, + "eval_compot_runtime": 94.6631, + "eval_compot_samples_per_second": 0.528, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 242463624, + "step": 2500 + }, + { + "epoch": 0.2444270629644114, + "loss": 0.29952487349510193, + "loss_ce": 0.03939303755760193, + "loss_iou": 0.5078125, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 242463624, + "step": 2500 + }, + { + "epoch": 0.2445248337895972, + "grad_norm": 8.804583855958066, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 242561116, + "step": 2501 + }, + { + "epoch": 0.2445248337895972, + "loss": 0.057464439421892166, + "loss_ce": 0.0038526817224919796, + "loss_iou": 0.373046875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 242561116, + "step": 2501 + }, + { + "epoch": 0.24462260461478294, + "grad_norm": 12.73989843823426, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 242657608, + "step": 2502 + }, + { + "epoch": 0.24462260461478294, + "loss": 0.09437395632266998, + "loss_ce": 0.006391775794327259, + "loss_iou": 0.326171875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 242657608, + "step": 2502 + }, + { + "epoch": 0.24472037543996872, + "grad_norm": 9.117642460442502, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 242754964, + "step": 2503 + }, + { + "epoch": 0.24472037543996872, + "loss": 0.07987861335277557, + "loss_ce": 0.002119830111041665, + "loss_iou": 0.2890625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 242754964, + "step": 2503 + }, + { + "epoch": 0.24481814626515447, + "grad_norm": 9.422204732908337, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 242851908, + "step": 2504 + }, + { + "epoch": 0.24481814626515447, + "loss": 0.09397809207439423, + "loss_ce": 0.0027610508259385824, + "loss_iou": 0.375, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 242851908, + "step": 2504 + }, + { + "epoch": 0.24491591709034025, + "grad_norm": 10.975836389187432, + "learning_rate": 5e-05, + "loss": 0.1272, + "num_input_tokens_seen": 242949024, + "step": 2505 + }, + { + "epoch": 0.24491591709034025, + "loss": 0.11897125840187073, + "loss_ce": 0.005064401775598526, + "loss_iou": 0.328125, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 242949024, + "step": 2505 + }, + { + "epoch": 0.245013687915526, + "grad_norm": 6.975386801720592, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 243045652, + "step": 2506 + }, + { + "epoch": 0.245013687915526, + "loss": 0.10633386671543121, + "loss_ce": 0.005488529801368713, + "loss_iou": 0.3203125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 243045652, + "step": 2506 + }, + { + "epoch": 0.24511145874071177, + "grad_norm": 7.296982426120189, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 243142304, + "step": 2507 + }, + { + "epoch": 0.24511145874071177, + "loss": 0.08531145006418228, + "loss_ce": 0.0061793699860572815, + "loss_iou": 0.322265625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 243142304, + "step": 2507 + }, + { + "epoch": 0.24520922956589752, + "grad_norm": 11.400584864370838, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 243238476, + "step": 2508 + }, + { + "epoch": 0.24520922956589752, + "loss": 0.07665440440177917, + "loss_ce": 0.006418195087462664, + "loss_iou": 0.2265625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 243238476, + "step": 2508 + }, + { + "epoch": 0.2453070003910833, + "grad_norm": 4.27599647542316, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 243334844, + "step": 2509 + }, + { + "epoch": 0.2453070003910833, + "loss": 0.07316942512989044, + "loss_ce": 0.008075431920588017, + "loss_iou": 0.45703125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 243334844, + "step": 2509 + }, + { + "epoch": 0.24540477121626905, + "grad_norm": 5.267318667995631, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 243431960, + "step": 2510 + }, + { + "epoch": 0.24540477121626905, + "loss": 0.07059654593467712, + "loss_ce": 0.0016420776955783367, + "loss_iou": 0.328125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 243431960, + "step": 2510 + }, + { + "epoch": 0.24550254204145483, + "grad_norm": 17.969888981494506, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 243529832, + "step": 2511 + }, + { + "epoch": 0.24550254204145483, + "loss": 0.13063740730285645, + "loss_ce": 0.006888620555400848, + "loss_iou": 0.447265625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 243529832, + "step": 2511 + }, + { + "epoch": 0.2456003128666406, + "grad_norm": 47.03432528837811, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 243627164, + "step": 2512 + }, + { + "epoch": 0.2456003128666406, + "loss": 0.0985831469297409, + "loss_ce": 0.0034903674386441708, + "loss_iou": 0.337890625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 243627164, + "step": 2512 + }, + { + "epoch": 0.24569808369182636, + "grad_norm": 17.476840198064576, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 243723368, + "step": 2513 + }, + { + "epoch": 0.24569808369182636, + "loss": 0.08767295628786087, + "loss_ce": 0.006831887643784285, + "loss_iou": 0.271484375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 243723368, + "step": 2513 + }, + { + "epoch": 0.24579585451701214, + "grad_norm": 5.2129261849565385, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 243819720, + "step": 2514 + }, + { + "epoch": 0.24579585451701214, + "loss": 0.10186967253684998, + "loss_ce": 0.00633439002558589, + "loss_iou": 0.2734375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 243819720, + "step": 2514 + }, + { + "epoch": 0.24589362534219789, + "grad_norm": 10.682017324136226, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 243917124, + "step": 2515 + }, + { + "epoch": 0.24589362534219789, + "loss": 0.07714027166366577, + "loss_ce": 0.0059046149253845215, + "loss_iou": 0.294921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 243917124, + "step": 2515 + }, + { + "epoch": 0.24599139616738366, + "grad_norm": 12.100256093419928, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 244014688, + "step": 2516 + }, + { + "epoch": 0.24599139616738366, + "loss": 0.15926595032215118, + "loss_ce": 0.010218098759651184, + "loss_iou": 0.33203125, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 244014688, + "step": 2516 + }, + { + "epoch": 0.2460891669925694, + "grad_norm": 3.9104668933839997, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 244112092, + "step": 2517 + }, + { + "epoch": 0.2460891669925694, + "loss": 0.11885984987020493, + "loss_ce": 0.005700675304979086, + "loss_iou": 0.466796875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 244112092, + "step": 2517 + }, + { + "epoch": 0.2461869378177552, + "grad_norm": 6.417957813021701, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 244209812, + "step": 2518 + }, + { + "epoch": 0.2461869378177552, + "loss": 0.142429918050766, + "loss_ce": 0.0044294181279838085, + "loss_iou": 0.33984375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 244209812, + "step": 2518 + }, + { + "epoch": 0.24628470864294094, + "grad_norm": 10.803445120412764, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 244306616, + "step": 2519 + }, + { + "epoch": 0.24628470864294094, + "loss": 0.08876839280128479, + "loss_ce": 0.001121904468163848, + "loss_iou": 0.28515625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 244306616, + "step": 2519 + }, + { + "epoch": 0.24638247946812672, + "grad_norm": 12.477110390317284, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 244403560, + "step": 2520 + }, + { + "epoch": 0.24638247946812672, + "loss": 0.09157511591911316, + "loss_ce": 0.006308998912572861, + "loss_iou": 0.318359375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 244403560, + "step": 2520 + }, + { + "epoch": 0.24648025029331247, + "grad_norm": 11.343670348847512, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 244500020, + "step": 2521 + }, + { + "epoch": 0.24648025029331247, + "loss": 0.12990254163742065, + "loss_ce": 0.0028273537755012512, + "loss_iou": 0.45703125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 244500020, + "step": 2521 + }, + { + "epoch": 0.24657802111849825, + "grad_norm": 15.49431048873777, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 244597212, + "step": 2522 + }, + { + "epoch": 0.24657802111849825, + "loss": 0.09457765519618988, + "loss_ce": 0.01085268147289753, + "loss_iou": 0.416015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 244597212, + "step": 2522 + }, + { + "epoch": 0.246675791943684, + "grad_norm": 8.93196615061949, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 244694456, + "step": 2523 + }, + { + "epoch": 0.246675791943684, + "loss": 0.10875323414802551, + "loss_ce": 0.0044899312779307365, + "loss_iou": 0.302734375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 244694456, + "step": 2523 + }, + { + "epoch": 0.24677356276886978, + "grad_norm": 3.127627952816637, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 244791664, + "step": 2524 + }, + { + "epoch": 0.24677356276886978, + "loss": 0.09364352375268936, + "loss_ce": 0.005600309930741787, + "loss_iou": 0.275390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 244791664, + "step": 2524 + }, + { + "epoch": 0.24687133359405553, + "grad_norm": 13.59236834248337, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 244889596, + "step": 2525 + }, + { + "epoch": 0.24687133359405553, + "loss": 0.10803750157356262, + "loss_ce": 0.00523904524743557, + "loss_iou": 0.322265625, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 244889596, + "step": 2525 + }, + { + "epoch": 0.2469691044192413, + "grad_norm": 4.615467312372097, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 244986304, + "step": 2526 + }, + { + "epoch": 0.2469691044192413, + "loss": 0.08520105481147766, + "loss_ce": 0.006587779149413109, + "loss_iou": 0.205078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 244986304, + "step": 2526 + }, + { + "epoch": 0.24706687524442705, + "grad_norm": 13.271510466232304, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 245083668, + "step": 2527 + }, + { + "epoch": 0.24706687524442705, + "loss": 0.13585297763347626, + "loss_ce": 0.002247018739581108, + "loss_iou": 0.380859375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 245083668, + "step": 2527 + }, + { + "epoch": 0.24716464606961283, + "grad_norm": 12.982751138215589, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 245180504, + "step": 2528 + }, + { + "epoch": 0.24716464606961283, + "loss": 0.09696591645479202, + "loss_ce": 0.005642067641019821, + "loss_iou": 0.302734375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 245180504, + "step": 2528 + }, + { + "epoch": 0.24726241689479858, + "grad_norm": 3.3453414001314865, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 245277172, + "step": 2529 + }, + { + "epoch": 0.24726241689479858, + "loss": 0.12518513202667236, + "loss_ce": 0.004224895965307951, + "loss_iou": 0.28125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 245277172, + "step": 2529 + }, + { + "epoch": 0.24736018771998436, + "grad_norm": 19.896245892483375, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 245372928, + "step": 2530 + }, + { + "epoch": 0.24736018771998436, + "loss": 0.12368416041135788, + "loss_ce": 0.004818197339773178, + "loss_iou": 0.38671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 245372928, + "step": 2530 + }, + { + "epoch": 0.2474579585451701, + "grad_norm": 3.365488277929309, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 245469136, + "step": 2531 + }, + { + "epoch": 0.2474579585451701, + "loss": 0.06553203612565994, + "loss_ce": 0.006007498130202293, + "loss_iou": 0.34375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 245469136, + "step": 2531 + }, + { + "epoch": 0.2475557293703559, + "grad_norm": 16.012526162916775, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 245566872, + "step": 2532 + }, + { + "epoch": 0.2475557293703559, + "loss": 0.11527001112699509, + "loss_ce": 0.007588737644255161, + "loss_iou": 0.318359375, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 245566872, + "step": 2532 + }, + { + "epoch": 0.24765350019554164, + "grad_norm": 4.343068314270537, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 245664460, + "step": 2533 + }, + { + "epoch": 0.24765350019554164, + "loss": 0.08500003814697266, + "loss_ce": 0.0031671500764787197, + "loss_iou": 0.42578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 245664460, + "step": 2533 + }, + { + "epoch": 0.24775127102072741, + "grad_norm": 4.833955254297367, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 245761064, + "step": 2534 + }, + { + "epoch": 0.24775127102072741, + "loss": 0.12584662437438965, + "loss_ce": 0.013358830474317074, + "loss_iou": 0.234375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 245761064, + "step": 2534 + }, + { + "epoch": 0.2478490418459132, + "grad_norm": 2.6881153526012715, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 245857716, + "step": 2535 + }, + { + "epoch": 0.2478490418459132, + "loss": 0.08457772433757782, + "loss_ce": 0.010374228470027447, + "loss_iou": 0.3046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 245857716, + "step": 2535 + }, + { + "epoch": 0.24794681267109894, + "grad_norm": 3.062186747084401, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 245954364, + "step": 2536 + }, + { + "epoch": 0.24794681267109894, + "loss": 0.09863868355751038, + "loss_ce": 0.0037900423631072044, + "loss_iou": 0.37890625, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 245954364, + "step": 2536 + }, + { + "epoch": 0.24804458349628472, + "grad_norm": 3.147728788206157, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 246051260, + "step": 2537 + }, + { + "epoch": 0.24804458349628472, + "loss": 0.06518770754337311, + "loss_ce": 0.004610314033925533, + "loss_iou": 0.39453125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 246051260, + "step": 2537 + }, + { + "epoch": 0.24814235432147047, + "grad_norm": 8.189158435543334, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 246148012, + "step": 2538 + }, + { + "epoch": 0.24814235432147047, + "loss": 0.08586849272251129, + "loss_ce": 0.005393638741225004, + "loss_iou": 0.1962890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 246148012, + "step": 2538 + }, + { + "epoch": 0.24824012514665625, + "grad_norm": 4.668703748822134, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 246245052, + "step": 2539 + }, + { + "epoch": 0.24824012514665625, + "loss": 0.10976120829582214, + "loss_ce": 0.007542584091424942, + "loss_iou": 0.330078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 246245052, + "step": 2539 + }, + { + "epoch": 0.248337895971842, + "grad_norm": 5.563829683379341, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 246342504, + "step": 2540 + }, + { + "epoch": 0.248337895971842, + "loss": 0.05462632328271866, + "loss_ce": 0.0030973905231803656, + "loss_iou": 0.39453125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 246342504, + "step": 2540 + }, + { + "epoch": 0.24843566679702778, + "grad_norm": 5.100646885513476, + "learning_rate": 5e-05, + "loss": 0.1158, + "num_input_tokens_seen": 246439392, + "step": 2541 + }, + { + "epoch": 0.24843566679702778, + "loss": 0.1032707542181015, + "loss_ce": 0.005004155449569225, + "loss_iou": 0.337890625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 246439392, + "step": 2541 + }, + { + "epoch": 0.24853343762221353, + "grad_norm": 4.862836401997486, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 246535712, + "step": 2542 + }, + { + "epoch": 0.24853343762221353, + "loss": 0.05229312926530838, + "loss_ce": 0.004563638474792242, + "loss_iou": 0.240234375, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 246535712, + "step": 2542 + }, + { + "epoch": 0.2486312084473993, + "grad_norm": 10.236221197469318, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 246633592, + "step": 2543 + }, + { + "epoch": 0.2486312084473993, + "loss": 0.1267463117837906, + "loss_ce": 0.004553928971290588, + "loss_iou": 0.337890625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 246633592, + "step": 2543 + }, + { + "epoch": 0.24872897927258505, + "grad_norm": 11.489922778574654, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 246730344, + "step": 2544 + }, + { + "epoch": 0.24872897927258505, + "loss": 0.07595562934875488, + "loss_ce": 0.005704160779714584, + "loss_iou": 0.369140625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 246730344, + "step": 2544 + }, + { + "epoch": 0.24882675009777083, + "grad_norm": 13.749796251950125, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 246827400, + "step": 2545 + }, + { + "epoch": 0.24882675009777083, + "loss": 0.13498303294181824, + "loss_ce": 0.006458258256316185, + "loss_iou": 0.296875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 246827400, + "step": 2545 + }, + { + "epoch": 0.24892452092295658, + "grad_norm": 13.661747323427047, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 246924664, + "step": 2546 + }, + { + "epoch": 0.24892452092295658, + "loss": 0.09112095832824707, + "loss_ce": 0.00689244270324707, + "loss_iou": 0.27734375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 246924664, + "step": 2546 + }, + { + "epoch": 0.24902229174814236, + "grad_norm": 12.779298281800942, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 247022248, + "step": 2547 + }, + { + "epoch": 0.24902229174814236, + "loss": 0.10007315874099731, + "loss_ce": 0.006201089359819889, + "loss_iou": 0.431640625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 247022248, + "step": 2547 + }, + { + "epoch": 0.2491200625733281, + "grad_norm": 4.559861635540057, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 247120220, + "step": 2548 + }, + { + "epoch": 0.2491200625733281, + "loss": 0.1388847380876541, + "loss_ce": 0.0054008569568395615, + "loss_iou": 0.421875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 247120220, + "step": 2548 + }, + { + "epoch": 0.2492178333985139, + "grad_norm": 4.683776613936348, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 247217836, + "step": 2549 + }, + { + "epoch": 0.2492178333985139, + "loss": 0.13597337901592255, + "loss_ce": 0.004839346278458834, + "loss_iou": 0.376953125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 247217836, + "step": 2549 + }, + { + "epoch": 0.24931560422369964, + "grad_norm": 11.182230410560921, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 247314812, + "step": 2550 + }, + { + "epoch": 0.24931560422369964, + "loss": 0.07191438972949982, + "loss_ce": 0.003799156751483679, + "loss_iou": 0.31640625, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 247314812, + "step": 2550 + }, + { + "epoch": 0.24941337504888542, + "grad_norm": 13.969533027621651, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 247412020, + "step": 2551 + }, + { + "epoch": 0.24941337504888542, + "loss": 0.10881113260984421, + "loss_ce": 0.008469337597489357, + "loss_iou": 0.326171875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 247412020, + "step": 2551 + }, + { + "epoch": 0.24951114587407117, + "grad_norm": 4.882474299043782, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 247509836, + "step": 2552 + }, + { + "epoch": 0.24951114587407117, + "loss": 0.10947667062282562, + "loss_ce": 0.006098373793065548, + "loss_iou": 0.35546875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 247509836, + "step": 2552 + }, + { + "epoch": 0.24960891669925694, + "grad_norm": 9.159881708170065, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 247606944, + "step": 2553 + }, + { + "epoch": 0.24960891669925694, + "loss": 0.07805541157722473, + "loss_ce": 0.006667172536253929, + "loss_iou": 0.271484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 247606944, + "step": 2553 + }, + { + "epoch": 0.2497066875244427, + "grad_norm": 10.898662828767938, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 247703240, + "step": 2554 + }, + { + "epoch": 0.2497066875244427, + "loss": 0.07755225151777267, + "loss_ce": 0.0057291314005851746, + "loss_iou": 0.265625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 247703240, + "step": 2554 + }, + { + "epoch": 0.24980445834962847, + "grad_norm": 7.630638824807499, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 247799180, + "step": 2555 + }, + { + "epoch": 0.24980445834962847, + "loss": 0.07226385921239853, + "loss_ce": 0.0031110283453017473, + "loss_iou": 0.353515625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 247799180, + "step": 2555 + }, + { + "epoch": 0.24990222917481422, + "grad_norm": 4.929961531746193, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 247896336, + "step": 2556 + }, + { + "epoch": 0.24990222917481422, + "loss": 0.06309600174427032, + "loss_ce": 0.002892447868362069, + "loss_iou": 0.361328125, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 247896336, + "step": 2556 + }, + { + "epoch": 0.25, + "grad_norm": 5.311312309541315, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 247993380, + "step": 2557 + }, + { + "epoch": 0.25, + "loss": 0.10771098732948303, + "loss_ce": 0.0047294143587350845, + "loss_iou": 0.36328125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 247993380, + "step": 2557 + }, + { + "epoch": 0.25009777082518575, + "grad_norm": 40.89834727100983, + "learning_rate": 5e-05, + "loss": 0.1206, + "num_input_tokens_seen": 248090680, + "step": 2558 + }, + { + "epoch": 0.25009777082518575, + "loss": 0.1498548984527588, + "loss_ce": 0.008375401608645916, + "loss_iou": 0.333984375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 248090680, + "step": 2558 + }, + { + "epoch": 0.25019554165037156, + "grad_norm": 6.810277659374034, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 248188204, + "step": 2559 + }, + { + "epoch": 0.25019554165037156, + "loss": 0.13239456713199615, + "loss_ce": 0.006021272391080856, + "loss_iou": 0.44921875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 248188204, + "step": 2559 + }, + { + "epoch": 0.2502933124755573, + "grad_norm": 25.924934062608674, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 248285276, + "step": 2560 + }, + { + "epoch": 0.2502933124755573, + "loss": 0.10491888225078583, + "loss_ce": 0.00405828608199954, + "loss_iou": 0.357421875, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 248285276, + "step": 2560 + }, + { + "epoch": 0.25039108330074306, + "grad_norm": 4.563336392933567, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 248381964, + "step": 2561 + }, + { + "epoch": 0.25039108330074306, + "loss": 0.0879025086760521, + "loss_ce": 0.005306680221110582, + "loss_iou": 0.43359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 248381964, + "step": 2561 + }, + { + "epoch": 0.2504888541259288, + "grad_norm": 7.015264098738258, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 248479380, + "step": 2562 + }, + { + "epoch": 0.2504888541259288, + "loss": 0.053448017686605453, + "loss_ce": 0.004085836000740528, + "loss_iou": 0.3203125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 248479380, + "step": 2562 + }, + { + "epoch": 0.2505866249511146, + "grad_norm": 15.364256364289437, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 248576580, + "step": 2563 + }, + { + "epoch": 0.2505866249511146, + "loss": 0.14903023838996887, + "loss_ce": 0.008267899975180626, + "loss_iou": 0.322265625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 248576580, + "step": 2563 + }, + { + "epoch": 0.25068439577630036, + "grad_norm": 12.232780872427126, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 248673100, + "step": 2564 + }, + { + "epoch": 0.25068439577630036, + "loss": 0.11846328526735306, + "loss_ce": 0.008386386558413506, + "loss_iou": 0.306640625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 248673100, + "step": 2564 + }, + { + "epoch": 0.2507821666014861, + "grad_norm": 14.222808115262795, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 248771428, + "step": 2565 + }, + { + "epoch": 0.2507821666014861, + "loss": 0.1446963995695114, + "loss_ce": 0.005475198850035667, + "loss_iou": 0.349609375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 248771428, + "step": 2565 + }, + { + "epoch": 0.25087993742667186, + "grad_norm": 12.339735739045588, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 248869196, + "step": 2566 + }, + { + "epoch": 0.25087993742667186, + "loss": 0.10172140598297119, + "loss_ce": 0.007910363376140594, + "loss_iou": 0.388671875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 248869196, + "step": 2566 + }, + { + "epoch": 0.25097770825185767, + "grad_norm": 12.600379481232714, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 248966064, + "step": 2567 + }, + { + "epoch": 0.25097770825185767, + "loss": 0.08144479990005493, + "loss_ce": 0.0044947294518351555, + "loss_iou": 0.36328125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 248966064, + "step": 2567 + }, + { + "epoch": 0.2510754790770434, + "grad_norm": 16.112391300405044, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 249062912, + "step": 2568 + }, + { + "epoch": 0.2510754790770434, + "loss": 0.0809018462896347, + "loss_ce": 0.0031735810916870832, + "loss_iou": 0.30859375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 249062912, + "step": 2568 + }, + { + "epoch": 0.25117324990222917, + "grad_norm": 19.61094989671693, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 249159852, + "step": 2569 + }, + { + "epoch": 0.25117324990222917, + "loss": 0.12321285903453827, + "loss_ce": 0.008207375183701515, + "loss_iou": 0.33203125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 249159852, + "step": 2569 + }, + { + "epoch": 0.2512710207274149, + "grad_norm": 6.9999924227555335, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 249256832, + "step": 2570 + }, + { + "epoch": 0.2512710207274149, + "loss": 0.09969986230134964, + "loss_ce": 0.0060719335451722145, + "loss_iou": 0.3203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 249256832, + "step": 2570 + }, + { + "epoch": 0.2513687915526007, + "grad_norm": 3.328203385688803, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 249354084, + "step": 2571 + }, + { + "epoch": 0.2513687915526007, + "loss": 0.06710001826286316, + "loss_ce": 0.0038675940595567226, + "loss_iou": 0.341796875, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 249354084, + "step": 2571 + }, + { + "epoch": 0.2514665623777865, + "grad_norm": 2.9272422752258884, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 249450852, + "step": 2572 + }, + { + "epoch": 0.2514665623777865, + "loss": 0.06219790130853653, + "loss_ce": 0.005175806116312742, + "loss_iou": 0.27734375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 249450852, + "step": 2572 + }, + { + "epoch": 0.2515643332029722, + "grad_norm": 2.952698519323386, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 249547508, + "step": 2573 + }, + { + "epoch": 0.2515643332029722, + "loss": 0.08816458284854889, + "loss_ce": 0.003966582473367453, + "loss_iou": 0.294921875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 249547508, + "step": 2573 + }, + { + "epoch": 0.251662104028158, + "grad_norm": 9.514380897320622, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 249644188, + "step": 2574 + }, + { + "epoch": 0.251662104028158, + "loss": 0.08365633338689804, + "loss_ce": 0.006233238615095615, + "loss_iou": 0.333984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 249644188, + "step": 2574 + }, + { + "epoch": 0.2517598748533438, + "grad_norm": 9.309509135101592, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 249741764, + "step": 2575 + }, + { + "epoch": 0.2517598748533438, + "loss": 0.11777915060520172, + "loss_ce": 0.004223247990012169, + "loss_iou": 0.353515625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 249741764, + "step": 2575 + }, + { + "epoch": 0.25185764567852953, + "grad_norm": 23.993867049915966, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 249838556, + "step": 2576 + }, + { + "epoch": 0.25185764567852953, + "loss": 0.03410065546631813, + "loss_ce": 0.0039035126101225615, + "loss_iou": 0.283203125, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 249838556, + "step": 2576 + }, + { + "epoch": 0.2519554165037153, + "grad_norm": 6.94521710333265, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 249936172, + "step": 2577 + }, + { + "epoch": 0.2519554165037153, + "loss": 0.07381050288677216, + "loss_ce": 0.004726329818367958, + "loss_iou": 0.388671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 249936172, + "step": 2577 + }, + { + "epoch": 0.25205318732890103, + "grad_norm": 4.633092307106733, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 250032324, + "step": 2578 + }, + { + "epoch": 0.25205318732890103, + "loss": 0.10188150405883789, + "loss_ce": 0.0024857523385435343, + "loss_iou": 0.31640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 250032324, + "step": 2578 + }, + { + "epoch": 0.25215095815408683, + "grad_norm": 5.65229262530144, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 250129460, + "step": 2579 + }, + { + "epoch": 0.25215095815408683, + "loss": 0.07319919764995575, + "loss_ce": 0.004458353854715824, + "loss_iou": 0.3359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 250129460, + "step": 2579 + }, + { + "epoch": 0.2522487289792726, + "grad_norm": 8.86318684447065, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 250226436, + "step": 2580 + }, + { + "epoch": 0.2522487289792726, + "loss": 0.08163784444332123, + "loss_ce": 0.002643089508637786, + "loss_iou": 0.31640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 250226436, + "step": 2580 + }, + { + "epoch": 0.25234649980445834, + "grad_norm": 16.931729218635425, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 250323840, + "step": 2581 + }, + { + "epoch": 0.25234649980445834, + "loss": 0.11344674974679947, + "loss_ce": 0.004315888974815607, + "loss_iou": 0.306640625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 250323840, + "step": 2581 + }, + { + "epoch": 0.25244427062964414, + "grad_norm": 3.4264788157663153, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 250421164, + "step": 2582 + }, + { + "epoch": 0.25244427062964414, + "loss": 0.08708082139492035, + "loss_ce": 0.008101330138742924, + "loss_iou": 0.4140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 250421164, + "step": 2582 + }, + { + "epoch": 0.2525420414548299, + "grad_norm": 2.560062277849411, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 250518176, + "step": 2583 + }, + { + "epoch": 0.2525420414548299, + "loss": 0.07650861144065857, + "loss_ce": 0.004304026253521442, + "loss_iou": 0.36328125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 250518176, + "step": 2583 + }, + { + "epoch": 0.25263981228001564, + "grad_norm": 4.592292545195852, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 250615352, + "step": 2584 + }, + { + "epoch": 0.25263981228001564, + "loss": 0.05307133123278618, + "loss_ce": 0.005219769198447466, + "loss_iou": 0.384765625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 250615352, + "step": 2584 + }, + { + "epoch": 0.2527375831052014, + "grad_norm": 7.644658516458953, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 250712104, + "step": 2585 + }, + { + "epoch": 0.2527375831052014, + "loss": 0.07797948271036148, + "loss_ce": 0.0028299479745328426, + "loss_iou": 0.3203125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 250712104, + "step": 2585 + }, + { + "epoch": 0.2528353539303872, + "grad_norm": 11.07803615660536, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 250809524, + "step": 2586 + }, + { + "epoch": 0.2528353539303872, + "loss": 0.05587419122457504, + "loss_ce": 0.004741990007460117, + "loss_iou": 0.2890625, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 250809524, + "step": 2586 + }, + { + "epoch": 0.25293312475557295, + "grad_norm": 3.424955122555866, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 250905096, + "step": 2587 + }, + { + "epoch": 0.25293312475557295, + "loss": 0.07257891446352005, + "loss_ce": 0.00946856290102005, + "loss_iou": 0.2109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 250905096, + "step": 2587 + }, + { + "epoch": 0.2530308955807587, + "grad_norm": 2.6064551128168576, + "learning_rate": 5e-05, + "loss": 0.1206, + "num_input_tokens_seen": 251001408, + "step": 2588 + }, + { + "epoch": 0.2530308955807587, + "loss": 0.15002113580703735, + "loss_ce": 0.0062070488929748535, + "loss_iou": 0.2119140625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 251001408, + "step": 2588 + }, + { + "epoch": 0.25312866640594445, + "grad_norm": 2.3473949170856434, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 251096968, + "step": 2589 + }, + { + "epoch": 0.25312866640594445, + "loss": 0.0798611044883728, + "loss_ce": 0.006443444173783064, + "loss_iou": 0.283203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 251096968, + "step": 2589 + }, + { + "epoch": 0.25322643723113025, + "grad_norm": 3.360451964111942, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 251194496, + "step": 2590 + }, + { + "epoch": 0.25322643723113025, + "loss": 0.12734879553318024, + "loss_ce": 0.007872480899095535, + "loss_iou": 0.302734375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 251194496, + "step": 2590 + }, + { + "epoch": 0.253324208056316, + "grad_norm": 4.6545996710368565, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 251292016, + "step": 2591 + }, + { + "epoch": 0.253324208056316, + "loss": 0.07044461369514465, + "loss_ce": 0.004465608857572079, + "loss_iou": 0.3515625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 251292016, + "step": 2591 + }, + { + "epoch": 0.25342197888150175, + "grad_norm": 7.089230386889951, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 251389024, + "step": 2592 + }, + { + "epoch": 0.25342197888150175, + "loss": 0.07754029333591461, + "loss_ce": 0.006251233164221048, + "loss_iou": 0.328125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 251389024, + "step": 2592 + }, + { + "epoch": 0.2535197497066875, + "grad_norm": 11.4800104400523, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 251485548, + "step": 2593 + }, + { + "epoch": 0.2535197497066875, + "loss": 0.09782195091247559, + "loss_ce": 0.005414720159024, + "loss_iou": 0.419921875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 251485548, + "step": 2593 + }, + { + "epoch": 0.2536175205318733, + "grad_norm": 11.863202130799996, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 251582380, + "step": 2594 + }, + { + "epoch": 0.2536175205318733, + "loss": 0.12070104479789734, + "loss_ce": 0.01225683931261301, + "loss_iou": 0.275390625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 251582380, + "step": 2594 + }, + { + "epoch": 0.25371529135705906, + "grad_norm": 5.625453869858867, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 251679356, + "step": 2595 + }, + { + "epoch": 0.25371529135705906, + "loss": 0.1018536388874054, + "loss_ce": 0.006119989790022373, + "loss_iou": 0.32421875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 251679356, + "step": 2595 + }, + { + "epoch": 0.2538130621822448, + "grad_norm": 2.2027344866438368, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 251775364, + "step": 2596 + }, + { + "epoch": 0.2538130621822448, + "loss": 0.06634389609098434, + "loss_ce": 0.006620996631681919, + "loss_iou": 0.203125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 251775364, + "step": 2596 + }, + { + "epoch": 0.25391083300743056, + "grad_norm": 7.308873599041735, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 251871568, + "step": 2597 + }, + { + "epoch": 0.25391083300743056, + "loss": 0.07603459805250168, + "loss_ce": 0.00779729476198554, + "loss_iou": 0.28125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 251871568, + "step": 2597 + }, + { + "epoch": 0.25400860383261636, + "grad_norm": 8.052801970790966, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 251968876, + "step": 2598 + }, + { + "epoch": 0.25400860383261636, + "loss": 0.08920365571975708, + "loss_ce": 0.004425829276442528, + "loss_iou": 0.369140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 251968876, + "step": 2598 + }, + { + "epoch": 0.2541063746578021, + "grad_norm": 5.408304628622146, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 252067128, + "step": 2599 + }, + { + "epoch": 0.2541063746578021, + "loss": 0.12465103715658188, + "loss_ce": 0.0040760827250778675, + "loss_iou": 0.49609375, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 252067128, + "step": 2599 + }, + { + "epoch": 0.25420414548298786, + "grad_norm": 4.511787096812498, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 252163884, + "step": 2600 + }, + { + "epoch": 0.25420414548298786, + "loss": 0.1218547374010086, + "loss_ce": 0.00527759175747633, + "loss_iou": 0.376953125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 252163884, + "step": 2600 + }, + { + "epoch": 0.2543019163081736, + "grad_norm": 5.779801178491289, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 252261008, + "step": 2601 + }, + { + "epoch": 0.2543019163081736, + "loss": 0.13071435689926147, + "loss_ce": 0.005836424417793751, + "loss_iou": 0.333984375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 252261008, + "step": 2601 + }, + { + "epoch": 0.2543996871333594, + "grad_norm": 3.9500021155119414, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 252358096, + "step": 2602 + }, + { + "epoch": 0.2543996871333594, + "loss": 0.11417719721794128, + "loss_ce": 0.006267042830586433, + "loss_iou": 0.33984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 252358096, + "step": 2602 + }, + { + "epoch": 0.25449745795854517, + "grad_norm": 4.694095106009654, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 252454580, + "step": 2603 + }, + { + "epoch": 0.25449745795854517, + "loss": 0.08059564232826233, + "loss_ce": 0.0028673727065324783, + "loss_iou": 0.435546875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 252454580, + "step": 2603 + }, + { + "epoch": 0.2545952287837309, + "grad_norm": 7.246585285471512, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 252551900, + "step": 2604 + }, + { + "epoch": 0.2545952287837309, + "loss": 0.0689559131860733, + "loss_ce": 0.0027480293065309525, + "loss_iou": 0.44140625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 252551900, + "step": 2604 + }, + { + "epoch": 0.2546929996089167, + "grad_norm": 6.1046025938103226, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 252648836, + "step": 2605 + }, + { + "epoch": 0.2546929996089167, + "loss": 0.0920117124915123, + "loss_ce": 0.005769040901213884, + "loss_iou": 0.4765625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 252648836, + "step": 2605 + }, + { + "epoch": 0.2547907704341025, + "grad_norm": 9.487036315802174, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 252746536, + "step": 2606 + }, + { + "epoch": 0.2547907704341025, + "loss": 0.09995494782924652, + "loss_ce": 0.004709590692073107, + "loss_iou": 0.4296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 252746536, + "step": 2606 + }, + { + "epoch": 0.2548885412592882, + "grad_norm": 5.897654236789132, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 252843308, + "step": 2607 + }, + { + "epoch": 0.2548885412592882, + "loss": 0.10621701180934906, + "loss_ce": 0.00566159188747406, + "loss_iou": 0.3671875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 252843308, + "step": 2607 + }, + { + "epoch": 0.254986312084474, + "grad_norm": 4.10878489368118, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 252940596, + "step": 2608 + }, + { + "epoch": 0.254986312084474, + "loss": 0.10056126117706299, + "loss_ce": 0.002172593493014574, + "loss_iou": 0.390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 252940596, + "step": 2608 + }, + { + "epoch": 0.2550840829096598, + "grad_norm": 13.623551594837707, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 253038112, + "step": 2609 + }, + { + "epoch": 0.2550840829096598, + "loss": 0.08842429518699646, + "loss_ce": 0.01007041521370411, + "loss_iou": 0.314453125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 253038112, + "step": 2609 + }, + { + "epoch": 0.25518185373484553, + "grad_norm": 17.881970667806044, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 253135364, + "step": 2610 + }, + { + "epoch": 0.25518185373484553, + "loss": 0.08901822566986084, + "loss_ce": 0.00834500789642334, + "loss_iou": 0.326171875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 253135364, + "step": 2610 + }, + { + "epoch": 0.2552796245600313, + "grad_norm": 6.274600433382897, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 253231196, + "step": 2611 + }, + { + "epoch": 0.2552796245600313, + "loss": 0.068811796605587, + "loss_ce": 0.004007718525826931, + "loss_iou": 0.408203125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 253231196, + "step": 2611 + }, + { + "epoch": 0.25537739538521703, + "grad_norm": 9.626718006728742, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 253327772, + "step": 2612 + }, + { + "epoch": 0.25537739538521703, + "loss": 0.07497909665107727, + "loss_ce": 0.0044148205779492855, + "loss_iou": 0.2197265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 253327772, + "step": 2612 + }, + { + "epoch": 0.25547516621040284, + "grad_norm": 7.6357652211900175, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 253425184, + "step": 2613 + }, + { + "epoch": 0.25547516621040284, + "loss": 0.11177895218133926, + "loss_ce": 0.0058219232596457005, + "loss_iou": 0.34375, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 253425184, + "step": 2613 + }, + { + "epoch": 0.2555729370355886, + "grad_norm": 8.290908000873177, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 253522184, + "step": 2614 + }, + { + "epoch": 0.2555729370355886, + "loss": 0.12018188834190369, + "loss_ce": 0.011966563761234283, + "loss_iou": 0.45703125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 253522184, + "step": 2614 + }, + { + "epoch": 0.25567070786077434, + "grad_norm": 18.559556719030375, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 253618684, + "step": 2615 + }, + { + "epoch": 0.25567070786077434, + "loss": 0.08944161236286163, + "loss_ce": 0.009378744289278984, + "loss_iou": 0.28515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 253618684, + "step": 2615 + }, + { + "epoch": 0.2557684786859601, + "grad_norm": 58.88456839712725, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 253715116, + "step": 2616 + }, + { + "epoch": 0.2557684786859601, + "loss": 0.10323873162269592, + "loss_ce": 0.007489832118153572, + "loss_iou": 0.3359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 253715116, + "step": 2616 + }, + { + "epoch": 0.2558662495111459, + "grad_norm": 31.185617870225716, + "learning_rate": 5e-05, + "loss": 0.1709, + "num_input_tokens_seen": 253811832, + "step": 2617 + }, + { + "epoch": 0.2558662495111459, + "loss": 0.23619814217090607, + "loss_ce": 0.007621486205607653, + "loss_iou": 0.26953125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 253811832, + "step": 2617 + }, + { + "epoch": 0.25596402033633164, + "grad_norm": 18.164176631226333, + "learning_rate": 5e-05, + "loss": 0.141, + "num_input_tokens_seen": 253908500, + "step": 2618 + }, + { + "epoch": 0.25596402033633164, + "loss": 0.10175138711929321, + "loss_ce": 0.0036831432953476906, + "loss_iou": 0.26953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 253908500, + "step": 2618 + }, + { + "epoch": 0.2560617911615174, + "grad_norm": 8.071667425089148, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 254005772, + "step": 2619 + }, + { + "epoch": 0.2560617911615174, + "loss": 0.06056801974773407, + "loss_ce": 0.005941553972661495, + "loss_iou": 0.328125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 254005772, + "step": 2619 + }, + { + "epoch": 0.25615956198670314, + "grad_norm": 4.81615369305614, + "learning_rate": 5e-05, + "loss": 0.1393, + "num_input_tokens_seen": 254101784, + "step": 2620 + }, + { + "epoch": 0.25615956198670314, + "loss": 0.12170301377773285, + "loss_ce": 0.005644666496664286, + "loss_iou": 0.2578125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 254101784, + "step": 2620 + }, + { + "epoch": 0.25625733281188895, + "grad_norm": 4.918985098311164, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 254198852, + "step": 2621 + }, + { + "epoch": 0.25625733281188895, + "loss": 0.08708013594150543, + "loss_ce": 0.004865786526352167, + "loss_iou": 0.359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 254198852, + "step": 2621 + }, + { + "epoch": 0.2563551036370747, + "grad_norm": 7.886845401233183, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 254295980, + "step": 2622 + }, + { + "epoch": 0.2563551036370747, + "loss": 0.1341112107038498, + "loss_ce": 0.008104127831757069, + "loss_iou": 0.267578125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 254295980, + "step": 2622 + }, + { + "epoch": 0.25645287446226045, + "grad_norm": 3.567047885444216, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 254392856, + "step": 2623 + }, + { + "epoch": 0.25645287446226045, + "loss": 0.09234487265348434, + "loss_ce": 0.00738774798810482, + "loss_iou": 0.375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 254392856, + "step": 2623 + }, + { + "epoch": 0.2565506452874462, + "grad_norm": 19.367931924107555, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 254490296, + "step": 2624 + }, + { + "epoch": 0.2565506452874462, + "loss": 0.07858569920063019, + "loss_ce": 0.0033293531741946936, + "loss_iou": 0.39453125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 254490296, + "step": 2624 + }, + { + "epoch": 0.256648416112632, + "grad_norm": 14.856107519139348, + "learning_rate": 5e-05, + "loss": 0.1305, + "num_input_tokens_seen": 254587580, + "step": 2625 + }, + { + "epoch": 0.256648416112632, + "loss": 0.1615869104862213, + "loss_ce": 0.0046655237674713135, + "loss_iou": 0.384765625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 254587580, + "step": 2625 + }, + { + "epoch": 0.25674618693781776, + "grad_norm": 17.298206024696004, + "learning_rate": 5e-05, + "loss": 0.1521, + "num_input_tokens_seen": 254685140, + "step": 2626 + }, + { + "epoch": 0.25674618693781776, + "loss": 0.1545390486717224, + "loss_ce": 0.004331531934440136, + "loss_iou": 0.38671875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 254685140, + "step": 2626 + }, + { + "epoch": 0.2568439577630035, + "grad_norm": 12.469886754587069, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 254782208, + "step": 2627 + }, + { + "epoch": 0.2568439577630035, + "loss": 0.08685188740491867, + "loss_ce": 0.00652962364256382, + "loss_iou": 0.451171875, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 254782208, + "step": 2627 + }, + { + "epoch": 0.2569417285881893, + "grad_norm": 11.784056021409748, + "learning_rate": 5e-05, + "loss": 0.1304, + "num_input_tokens_seen": 254879092, + "step": 2628 + }, + { + "epoch": 0.2569417285881893, + "loss": 0.1528445929288864, + "loss_ce": 0.012616313993930817, + "loss_iou": 0.306640625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 254879092, + "step": 2628 + }, + { + "epoch": 0.25703949941337506, + "grad_norm": 19.105067764394036, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 254975688, + "step": 2629 + }, + { + "epoch": 0.25703949941337506, + "loss": 0.084825798869133, + "loss_ce": 0.006166741251945496, + "loss_iou": 0.28515625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 254975688, + "step": 2629 + }, + { + "epoch": 0.2571372702385608, + "grad_norm": 7.5377993988949425, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 255073044, + "step": 2630 + }, + { + "epoch": 0.2571372702385608, + "loss": 0.0716162919998169, + "loss_ce": 0.0020018829964101315, + "loss_iou": 0.34765625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 255073044, + "step": 2630 + }, + { + "epoch": 0.25723504106374656, + "grad_norm": 4.869224655796465, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 255169432, + "step": 2631 + }, + { + "epoch": 0.25723504106374656, + "loss": 0.07346859574317932, + "loss_ce": 0.00800839252769947, + "loss_iou": 0.26171875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 255169432, + "step": 2631 + }, + { + "epoch": 0.25733281188893237, + "grad_norm": 2.9890973276890094, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 255267364, + "step": 2632 + }, + { + "epoch": 0.25733281188893237, + "loss": 0.09712676703929901, + "loss_ce": 0.0028579644858837128, + "loss_iou": 0.404296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 255267364, + "step": 2632 + }, + { + "epoch": 0.2574305827141181, + "grad_norm": 7.8682041056085845, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 255364532, + "step": 2633 + }, + { + "epoch": 0.2574305827141181, + "loss": 0.12185589969158173, + "loss_ce": 0.006446055136620998, + "loss_iou": 0.302734375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 255364532, + "step": 2633 + }, + { + "epoch": 0.25752835353930387, + "grad_norm": 18.719442643462205, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 255461628, + "step": 2634 + }, + { + "epoch": 0.25752835353930387, + "loss": 0.1574307531118393, + "loss_ce": 0.0066128866747021675, + "loss_iou": 0.31640625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 255461628, + "step": 2634 + }, + { + "epoch": 0.2576261243644896, + "grad_norm": 13.665600472002927, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 255556976, + "step": 2635 + }, + { + "epoch": 0.2576261243644896, + "loss": 0.07061992585659027, + "loss_ce": 0.007360806223005056, + "loss_iou": 0.212890625, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 255556976, + "step": 2635 + }, + { + "epoch": 0.2577238951896754, + "grad_norm": 15.447203304634593, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 255653780, + "step": 2636 + }, + { + "epoch": 0.2577238951896754, + "loss": 0.0748756155371666, + "loss_ce": 0.008000162430107594, + "loss_iou": 0.2109375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 255653780, + "step": 2636 + }, + { + "epoch": 0.2578216660148612, + "grad_norm": 5.001921717920358, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 255750796, + "step": 2637 + }, + { + "epoch": 0.2578216660148612, + "loss": 0.08902457356452942, + "loss_ce": 0.0029955171048641205, + "loss_iou": 0.35546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 255750796, + "step": 2637 + }, + { + "epoch": 0.2579194368400469, + "grad_norm": 10.08905926785391, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 255847544, + "step": 2638 + }, + { + "epoch": 0.2579194368400469, + "loss": 0.11098609864711761, + "loss_ce": 0.0043881977908313274, + "loss_iou": 0.390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 255847544, + "step": 2638 + }, + { + "epoch": 0.2580172076652327, + "grad_norm": 3.208365627615248, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 255944248, + "step": 2639 + }, + { + "epoch": 0.2580172076652327, + "loss": 0.11000959575176239, + "loss_ce": 0.00917951948940754, + "loss_iou": 0.2294921875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 255944248, + "step": 2639 + }, + { + "epoch": 0.2581149784904185, + "grad_norm": 8.690210645070666, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 256042804, + "step": 2640 + }, + { + "epoch": 0.2581149784904185, + "loss": 0.09174863994121552, + "loss_ce": 0.007291238754987717, + "loss_iou": 0.39453125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 256042804, + "step": 2640 + }, + { + "epoch": 0.25821274931560423, + "grad_norm": 3.125072724350937, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 256139280, + "step": 2641 + }, + { + "epoch": 0.25821274931560423, + "loss": 0.08694857358932495, + "loss_ce": 0.005008874461054802, + "loss_iou": 0.333984375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 256139280, + "step": 2641 + }, + { + "epoch": 0.25831052014079, + "grad_norm": 28.90968573042501, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 256236448, + "step": 2642 + }, + { + "epoch": 0.25831052014079, + "loss": 0.13241946697235107, + "loss_ce": 0.008350243791937828, + "loss_iou": 0.2890625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 256236448, + "step": 2642 + }, + { + "epoch": 0.25840829096597573, + "grad_norm": 13.78916367716625, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 256333492, + "step": 2643 + }, + { + "epoch": 0.25840829096597573, + "loss": 0.12880921363830566, + "loss_ce": 0.008447883650660515, + "loss_iou": 0.271484375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 256333492, + "step": 2643 + }, + { + "epoch": 0.25850606179116153, + "grad_norm": 3.911280000054562, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 256430668, + "step": 2644 + }, + { + "epoch": 0.25850606179116153, + "loss": 0.1233225166797638, + "loss_ce": 0.004456552676856518, + "loss_iou": 0.357421875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 256430668, + "step": 2644 + }, + { + "epoch": 0.2586038326163473, + "grad_norm": 13.21871321185113, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 256528640, + "step": 2645 + }, + { + "epoch": 0.2586038326163473, + "loss": 0.11064116656780243, + "loss_ce": 0.009681394323706627, + "loss_iou": 0.296875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 256528640, + "step": 2645 + }, + { + "epoch": 0.25870160344153303, + "grad_norm": 2.7552379648657404, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 256625452, + "step": 2646 + }, + { + "epoch": 0.25870160344153303, + "loss": 0.09782908856868744, + "loss_ce": 0.006894337013363838, + "loss_iou": 0.24609375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 256625452, + "step": 2646 + }, + { + "epoch": 0.2587993742667188, + "grad_norm": 2.9993496141800935, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 256722644, + "step": 2647 + }, + { + "epoch": 0.2587993742667188, + "loss": 0.05121837928891182, + "loss_ce": 0.002283442532643676, + "loss_iou": 0.302734375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 256722644, + "step": 2647 + }, + { + "epoch": 0.2588971450919046, + "grad_norm": 13.826774613612402, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 256819316, + "step": 2648 + }, + { + "epoch": 0.2588971450919046, + "loss": 0.14774733781814575, + "loss_ce": 0.0061457837000489235, + "loss_iou": 0.3125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 256819316, + "step": 2648 + }, + { + "epoch": 0.25899491591709034, + "grad_norm": 3.5437289134156886, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 256916268, + "step": 2649 + }, + { + "epoch": 0.25899491591709034, + "loss": 0.09839766472578049, + "loss_ce": 0.004800258670002222, + "loss_iou": 0.439453125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 256916268, + "step": 2649 + }, + { + "epoch": 0.2590926867422761, + "grad_norm": 11.798327816814455, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 257013276, + "step": 2650 + }, + { + "epoch": 0.2590926867422761, + "loss": 0.078218474984169, + "loss_ce": 0.004854216240346432, + "loss_iou": 0.326171875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 257013276, + "step": 2650 + }, + { + "epoch": 0.2591904575674619, + "grad_norm": 5.600706532206249, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 257110520, + "step": 2651 + }, + { + "epoch": 0.2591904575674619, + "loss": 0.16101451218128204, + "loss_ce": 0.004428823944181204, + "loss_iou": 0.515625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 257110520, + "step": 2651 + }, + { + "epoch": 0.25928822839264765, + "grad_norm": 26.36992194747416, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 257207300, + "step": 2652 + }, + { + "epoch": 0.25928822839264765, + "loss": 0.12326764315366745, + "loss_ce": 0.0085673276335001, + "loss_iou": 0.3671875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 257207300, + "step": 2652 + }, + { + "epoch": 0.2593859992178334, + "grad_norm": 9.406160309551252, + "learning_rate": 5e-05, + "loss": 0.1146, + "num_input_tokens_seen": 257304572, + "step": 2653 + }, + { + "epoch": 0.2593859992178334, + "loss": 0.08123256266117096, + "loss_ce": 0.005243790335953236, + "loss_iou": 0.328125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 257304572, + "step": 2653 + }, + { + "epoch": 0.25948377004301915, + "grad_norm": 5.105527774904853, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 257400992, + "step": 2654 + }, + { + "epoch": 0.25948377004301915, + "loss": 0.12118048965930939, + "loss_ce": 0.006648021750152111, + "loss_iou": 0.400390625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 257400992, + "step": 2654 + }, + { + "epoch": 0.25958154086820495, + "grad_norm": 2.6643576819272443, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 257498304, + "step": 2655 + }, + { + "epoch": 0.25958154086820495, + "loss": 0.09127358347177505, + "loss_ce": 0.006266869604587555, + "loss_iou": 0.275390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 257498304, + "step": 2655 + }, + { + "epoch": 0.2596793116933907, + "grad_norm": 16.00780646497479, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 257595708, + "step": 2656 + }, + { + "epoch": 0.2596793116933907, + "loss": 0.059769678860902786, + "loss_ce": 0.006730129476636648, + "loss_iou": 0.359375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 257595708, + "step": 2656 + }, + { + "epoch": 0.25977708251857645, + "grad_norm": 3.910814258368873, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 257692248, + "step": 2657 + }, + { + "epoch": 0.25977708251857645, + "loss": 0.08199480921030045, + "loss_ce": 0.005395687650889158, + "loss_iou": 0.28125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 257692248, + "step": 2657 + }, + { + "epoch": 0.2598748533437622, + "grad_norm": 71.3705265260031, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 257789336, + "step": 2658 + }, + { + "epoch": 0.2598748533437622, + "loss": 0.16219593584537506, + "loss_ce": 0.00544239766895771, + "loss_iou": 0.2578125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 257789336, + "step": 2658 + }, + { + "epoch": 0.259972624168948, + "grad_norm": 11.395414556617645, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 257885772, + "step": 2659 + }, + { + "epoch": 0.259972624168948, + "loss": 0.10079167783260345, + "loss_ce": 0.007590990513563156, + "loss_iou": 0.2734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 257885772, + "step": 2659 + }, + { + "epoch": 0.26007039499413376, + "grad_norm": 4.468208673057393, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 257982716, + "step": 2660 + }, + { + "epoch": 0.26007039499413376, + "loss": 0.044842950999736786, + "loss_ce": 0.003789182286709547, + "loss_iou": 0.33203125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 257982716, + "step": 2660 + }, + { + "epoch": 0.2601681658193195, + "grad_norm": 16.957499988050607, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 258079496, + "step": 2661 + }, + { + "epoch": 0.2601681658193195, + "loss": 0.14971831440925598, + "loss_ce": 0.006423034705221653, + "loss_iou": 0.345703125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 258079496, + "step": 2661 + }, + { + "epoch": 0.26026593664450526, + "grad_norm": 5.64989844020514, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 258175628, + "step": 2662 + }, + { + "epoch": 0.26026593664450526, + "loss": 0.09083367139101028, + "loss_ce": 0.006322862580418587, + "loss_iou": 0.279296875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 258175628, + "step": 2662 + }, + { + "epoch": 0.26036370746969106, + "grad_norm": 5.015825517895758, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 258272580, + "step": 2663 + }, + { + "epoch": 0.26036370746969106, + "loss": 0.08842860162258148, + "loss_ce": 0.002170662861317396, + "loss_iou": 0.3359375, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 258272580, + "step": 2663 + }, + { + "epoch": 0.2604614782948768, + "grad_norm": 9.861686756256553, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 258369248, + "step": 2664 + }, + { + "epoch": 0.2604614782948768, + "loss": 0.07655265927314758, + "loss_ce": 0.011794356629252434, + "loss_iou": 0.1943359375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 258369248, + "step": 2664 + }, + { + "epoch": 0.26055924912006256, + "grad_norm": 16.072470203528397, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 258466040, + "step": 2665 + }, + { + "epoch": 0.26055924912006256, + "loss": 0.08833427727222443, + "loss_ce": 0.008561329916119576, + "loss_iou": 0.40625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 258466040, + "step": 2665 + }, + { + "epoch": 0.2606570199452483, + "grad_norm": 6.550220438037178, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 258562364, + "step": 2666 + }, + { + "epoch": 0.2606570199452483, + "loss": 0.11299584060907364, + "loss_ce": 0.007939082570374012, + "loss_iou": 0.220703125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 258562364, + "step": 2666 + }, + { + "epoch": 0.2607547907704341, + "grad_norm": 6.332137442777131, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 258659232, + "step": 2667 + }, + { + "epoch": 0.2607547907704341, + "loss": 0.0671197697520256, + "loss_ce": 0.0075418283231556416, + "loss_iou": 0.361328125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 258659232, + "step": 2667 + }, + { + "epoch": 0.26085256159561987, + "grad_norm": 6.411465759829137, + "learning_rate": 5e-05, + "loss": 0.1479, + "num_input_tokens_seen": 258755576, + "step": 2668 + }, + { + "epoch": 0.26085256159561987, + "loss": 0.12602627277374268, + "loss_ce": 0.009616967290639877, + "loss_iou": 0.32421875, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 258755576, + "step": 2668 + }, + { + "epoch": 0.2609503324208056, + "grad_norm": 9.938147385970725, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 258852800, + "step": 2669 + }, + { + "epoch": 0.2609503324208056, + "loss": 0.09796278178691864, + "loss_ce": 0.00914137065410614, + "loss_iou": 0.38671875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 258852800, + "step": 2669 + }, + { + "epoch": 0.26104810324599137, + "grad_norm": 8.898128658414121, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 258948848, + "step": 2670 + }, + { + "epoch": 0.26104810324599137, + "loss": 0.08023790270090103, + "loss_ce": 0.002906365320086479, + "loss_iou": 0.298828125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 258948848, + "step": 2670 + }, + { + "epoch": 0.2611458740711772, + "grad_norm": 7.431718831060695, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 259045540, + "step": 2671 + }, + { + "epoch": 0.2611458740711772, + "loss": 0.06722894310951233, + "loss_ce": 0.009375238791108131, + "loss_iou": 0.291015625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 259045540, + "step": 2671 + }, + { + "epoch": 0.2612436448963629, + "grad_norm": 16.397739382117553, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 259142632, + "step": 2672 + }, + { + "epoch": 0.2612436448963629, + "loss": 0.11071256548166275, + "loss_ce": 0.006739174481481314, + "loss_iou": 0.482421875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 259142632, + "step": 2672 + }, + { + "epoch": 0.2613414157215487, + "grad_norm": 25.78593005098518, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 259238676, + "step": 2673 + }, + { + "epoch": 0.2613414157215487, + "loss": 0.07169494032859802, + "loss_ce": 0.007791127078235149, + "loss_iou": 0.291015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 259238676, + "step": 2673 + }, + { + "epoch": 0.2614391865467345, + "grad_norm": 17.883031061122193, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 259335312, + "step": 2674 + }, + { + "epoch": 0.2614391865467345, + "loss": 0.17001983523368835, + "loss_ce": 0.009680484421551228, + "loss_iou": 0.310546875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 259335312, + "step": 2674 + }, + { + "epoch": 0.26153695737192023, + "grad_norm": 3.9758137447300808, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 259432808, + "step": 2675 + }, + { + "epoch": 0.26153695737192023, + "loss": 0.0925728902220726, + "loss_ce": 0.0077416496351361275, + "loss_iou": 0.318359375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 259432808, + "step": 2675 + }, + { + "epoch": 0.261634728197106, + "grad_norm": 14.245192361636834, + "learning_rate": 5e-05, + "loss": 0.1418, + "num_input_tokens_seen": 259530308, + "step": 2676 + }, + { + "epoch": 0.261634728197106, + "loss": 0.11840018630027771, + "loss_ce": 0.006705852225422859, + "loss_iou": 0.31640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 259530308, + "step": 2676 + }, + { + "epoch": 0.26173249902229173, + "grad_norm": 17.01505326183716, + "learning_rate": 5e-05, + "loss": 0.1188, + "num_input_tokens_seen": 259628076, + "step": 2677 + }, + { + "epoch": 0.26173249902229173, + "loss": 0.13368536531925201, + "loss_ce": 0.005969302728772163, + "loss_iou": 0.34765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 259628076, + "step": 2677 + }, + { + "epoch": 0.26183026984747754, + "grad_norm": 4.772126568359451, + "learning_rate": 5e-05, + "loss": 0.1312, + "num_input_tokens_seen": 259724740, + "step": 2678 + }, + { + "epoch": 0.26183026984747754, + "loss": 0.12696725130081177, + "loss_ce": 0.002470782957971096, + "loss_iou": 0.41796875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 259724740, + "step": 2678 + }, + { + "epoch": 0.2619280406726633, + "grad_norm": 2.94440929223437, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 259820784, + "step": 2679 + }, + { + "epoch": 0.2619280406726633, + "loss": 0.07656657695770264, + "loss_ce": 0.005967979319393635, + "loss_iou": 0.28125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 259820784, + "step": 2679 + }, + { + "epoch": 0.26202581149784904, + "grad_norm": 6.177383615043738, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 259917544, + "step": 2680 + }, + { + "epoch": 0.26202581149784904, + "loss": 0.06687857955694199, + "loss_ce": 0.007796549703925848, + "loss_iou": 0.3984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 259917544, + "step": 2680 + }, + { + "epoch": 0.2621235823230348, + "grad_norm": 4.320879135655721, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 260014688, + "step": 2681 + }, + { + "epoch": 0.2621235823230348, + "loss": 0.11087194085121155, + "loss_ce": 0.005120911169797182, + "loss_iou": 0.396484375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 260014688, + "step": 2681 + }, + { + "epoch": 0.2622213531482206, + "grad_norm": 12.903618069876547, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 260111668, + "step": 2682 + }, + { + "epoch": 0.2622213531482206, + "loss": 0.06504650413990021, + "loss_ce": 0.0034925518557429314, + "loss_iou": 0.3671875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 260111668, + "step": 2682 + }, + { + "epoch": 0.26231912397340634, + "grad_norm": 3.50141247781083, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 260209268, + "step": 2683 + }, + { + "epoch": 0.26231912397340634, + "loss": 0.09445930272340775, + "loss_ce": 0.005073314066976309, + "loss_iou": 0.373046875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 260209268, + "step": 2683 + }, + { + "epoch": 0.2624168947985921, + "grad_norm": 5.284055613447496, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 260305964, + "step": 2684 + }, + { + "epoch": 0.2624168947985921, + "loss": 0.11366300284862518, + "loss_ce": 0.004822054877877235, + "loss_iou": 0.404296875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 260305964, + "step": 2684 + }, + { + "epoch": 0.26251466562377784, + "grad_norm": 5.617283662336253, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 260401768, + "step": 2685 + }, + { + "epoch": 0.26251466562377784, + "loss": 0.1320902705192566, + "loss_ce": 0.006952932570129633, + "loss_iou": 0.31640625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 260401768, + "step": 2685 + }, + { + "epoch": 0.26261243644896365, + "grad_norm": 3.2280906937628995, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 260498536, + "step": 2686 + }, + { + "epoch": 0.26261243644896365, + "loss": 0.08926264941692352, + "loss_ce": 0.003996536135673523, + "loss_iou": 0.2177734375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 260498536, + "step": 2686 + }, + { + "epoch": 0.2627102072741494, + "grad_norm": 2.130763342370051, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 260595808, + "step": 2687 + }, + { + "epoch": 0.2627102072741494, + "loss": 0.07943408936262131, + "loss_ce": 0.00539844436571002, + "loss_iou": 0.341796875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 260595808, + "step": 2687 + }, + { + "epoch": 0.26280797809933515, + "grad_norm": 2.530145635992122, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 260692576, + "step": 2688 + }, + { + "epoch": 0.26280797809933515, + "loss": 0.063815176486969, + "loss_ce": 0.003222523955628276, + "loss_iou": 0.384765625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 260692576, + "step": 2688 + }, + { + "epoch": 0.2629057489245209, + "grad_norm": 4.3394210923829695, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 260789316, + "step": 2689 + }, + { + "epoch": 0.2629057489245209, + "loss": 0.09478726238012314, + "loss_ce": 0.006225253455340862, + "loss_iou": 0.22265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 260789316, + "step": 2689 + }, + { + "epoch": 0.2630035197497067, + "grad_norm": 7.467264395441328, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 260886080, + "step": 2690 + }, + { + "epoch": 0.2630035197497067, + "loss": 0.10442844778299332, + "loss_ce": 0.003979839384555817, + "loss_iou": 0.31640625, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 260886080, + "step": 2690 + }, + { + "epoch": 0.26310129057489245, + "grad_norm": 8.76034085337642, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 260983436, + "step": 2691 + }, + { + "epoch": 0.26310129057489245, + "loss": 0.13121014833450317, + "loss_ce": 0.008178537711501122, + "loss_iou": 0.431640625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 260983436, + "step": 2691 + }, + { + "epoch": 0.2631990614000782, + "grad_norm": 21.67988266130293, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 261081108, + "step": 2692 + }, + { + "epoch": 0.2631990614000782, + "loss": 0.043852753937244415, + "loss_ce": 0.0036115124821662903, + "loss_iou": 0.32421875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 261081108, + "step": 2692 + }, + { + "epoch": 0.26329683222526395, + "grad_norm": 10.28987492348227, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 261177852, + "step": 2693 + }, + { + "epoch": 0.26329683222526395, + "loss": 0.07042244076728821, + "loss_ce": 0.005887302570044994, + "loss_iou": 0.345703125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 261177852, + "step": 2693 + }, + { + "epoch": 0.26339460305044976, + "grad_norm": 6.462848351400865, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 261275364, + "step": 2694 + }, + { + "epoch": 0.26339460305044976, + "loss": 0.0877131000161171, + "loss_ce": 0.007375577464699745, + "loss_iou": 0.318359375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 261275364, + "step": 2694 + }, + { + "epoch": 0.2634923738756355, + "grad_norm": 30.46648155976952, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 261371696, + "step": 2695 + }, + { + "epoch": 0.2634923738756355, + "loss": 0.10128787904977798, + "loss_ce": 0.005287209991365671, + "loss_iou": 0.2890625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 261371696, + "step": 2695 + }, + { + "epoch": 0.26359014470082126, + "grad_norm": 50.05952892589266, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 261467820, + "step": 2696 + }, + { + "epoch": 0.26359014470082126, + "loss": 0.10251781344413757, + "loss_ce": 0.005655020009726286, + "loss_iou": 0.31640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 261467820, + "step": 2696 + }, + { + "epoch": 0.26368791552600707, + "grad_norm": 17.43430106293114, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 261564692, + "step": 2697 + }, + { + "epoch": 0.26368791552600707, + "loss": 0.10537004470825195, + "loss_ce": 0.00414323341101408, + "loss_iou": 0.33203125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 261564692, + "step": 2697 + }, + { + "epoch": 0.2637856863511928, + "grad_norm": 11.039145751291517, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 261660480, + "step": 2698 + }, + { + "epoch": 0.2637856863511928, + "loss": 0.08779741823673248, + "loss_ce": 0.008054984733462334, + "loss_iou": 0.310546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 261660480, + "step": 2698 + }, + { + "epoch": 0.26388345717637857, + "grad_norm": 10.657354623264872, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 261756884, + "step": 2699 + }, + { + "epoch": 0.26388345717637857, + "loss": 0.11006423830986023, + "loss_ce": 0.004198758397251368, + "loss_iou": 0.2392578125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 261756884, + "step": 2699 + }, + { + "epoch": 0.2639812280015643, + "grad_norm": 4.618938908112567, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 261853100, + "step": 2700 + }, + { + "epoch": 0.2639812280015643, + "loss": 0.07327930629253387, + "loss_ce": 0.0068577989004552364, + "loss_iou": 0.2119140625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 261853100, + "step": 2700 + }, + { + "epoch": 0.2640789988267501, + "grad_norm": 7.394799851468591, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 261949636, + "step": 2701 + }, + { + "epoch": 0.2640789988267501, + "loss": 0.07501775771379471, + "loss_ce": 0.0028589419089257717, + "loss_iou": 0.328125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 261949636, + "step": 2701 + }, + { + "epoch": 0.26417676965193587, + "grad_norm": 4.837266055627927, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 262046304, + "step": 2702 + }, + { + "epoch": 0.26417676965193587, + "loss": 0.07869039475917816, + "loss_ce": 0.004288541618734598, + "loss_iou": 0.361328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 262046304, + "step": 2702 + }, + { + "epoch": 0.2642745404771216, + "grad_norm": 9.606365600795282, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 262142900, + "step": 2703 + }, + { + "epoch": 0.2642745404771216, + "loss": 0.06126384809613228, + "loss_ce": 0.006683158688247204, + "loss_iou": 0.306640625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 262142900, + "step": 2703 + }, + { + "epoch": 0.2643723113023074, + "grad_norm": 30.093808345086448, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 262240292, + "step": 2704 + }, + { + "epoch": 0.2643723113023074, + "loss": 0.11744159460067749, + "loss_ce": 0.0057167429476976395, + "loss_iou": 0.38671875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 262240292, + "step": 2704 + }, + { + "epoch": 0.2644700821274932, + "grad_norm": 3.894253169332776, + "learning_rate": 5e-05, + "loss": 0.1423, + "num_input_tokens_seen": 262336920, + "step": 2705 + }, + { + "epoch": 0.2644700821274932, + "loss": 0.16730663180351257, + "loss_ce": 0.011911114677786827, + "loss_iou": 0.365234375, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 262336920, + "step": 2705 + }, + { + "epoch": 0.26456785295267893, + "grad_norm": 9.530319813452383, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 262432204, + "step": 2706 + }, + { + "epoch": 0.26456785295267893, + "loss": 0.08835460245609283, + "loss_ce": 0.012625234201550484, + "loss_iou": 0.23046875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 262432204, + "step": 2706 + }, + { + "epoch": 0.2646656237778647, + "grad_norm": 4.374987193756325, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 262529156, + "step": 2707 + }, + { + "epoch": 0.2646656237778647, + "loss": 0.13054108619689941, + "loss_ce": 0.008241896517574787, + "loss_iou": 0.30859375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 262529156, + "step": 2707 + }, + { + "epoch": 0.26476339460305043, + "grad_norm": 3.046875618020551, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 262626240, + "step": 2708 + }, + { + "epoch": 0.26476339460305043, + "loss": 0.07920996099710464, + "loss_ce": 0.0030457167886197567, + "loss_iou": 0.294921875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 262626240, + "step": 2708 + }, + { + "epoch": 0.26486116542823623, + "grad_norm": 9.94495743440289, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 262723244, + "step": 2709 + }, + { + "epoch": 0.26486116542823623, + "loss": 0.06238282471895218, + "loss_ce": 0.006413587369024754, + "loss_iou": 0.310546875, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 262723244, + "step": 2709 + }, + { + "epoch": 0.264958936253422, + "grad_norm": 13.949630825066462, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 262820376, + "step": 2710 + }, + { + "epoch": 0.264958936253422, + "loss": 0.06981299817562103, + "loss_ce": 0.003345712786540389, + "loss_iou": 0.357421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 262820376, + "step": 2710 + }, + { + "epoch": 0.26505670707860773, + "grad_norm": 3.7175774363959277, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 262917872, + "step": 2711 + }, + { + "epoch": 0.26505670707860773, + "loss": 0.07430432736873627, + "loss_ce": 0.005319343879818916, + "loss_iou": 0.3515625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 262917872, + "step": 2711 + }, + { + "epoch": 0.2651544779037935, + "grad_norm": 7.339868757857535, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 263014832, + "step": 2712 + }, + { + "epoch": 0.2651544779037935, + "loss": 0.0808597058057785, + "loss_ce": 0.006305254064500332, + "loss_iou": 0.33203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 263014832, + "step": 2712 + }, + { + "epoch": 0.2652522487289793, + "grad_norm": 4.618756954553584, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 263112120, + "step": 2713 + }, + { + "epoch": 0.2652522487289793, + "loss": 0.0968569666147232, + "loss_ce": 0.002923868130892515, + "loss_iou": 0.318359375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 263112120, + "step": 2713 + }, + { + "epoch": 0.26535001955416504, + "grad_norm": 13.299382030542626, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 263208756, + "step": 2714 + }, + { + "epoch": 0.26535001955416504, + "loss": 0.09836246818304062, + "loss_ce": 0.006321453023701906, + "loss_iou": 0.34765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 263208756, + "step": 2714 + }, + { + "epoch": 0.2654477903793508, + "grad_norm": 14.655727969028833, + "learning_rate": 5e-05, + "loss": 0.1197, + "num_input_tokens_seen": 263305408, + "step": 2715 + }, + { + "epoch": 0.2654477903793508, + "loss": 0.08049595355987549, + "loss_ce": 0.004964948631823063, + "loss_iou": 0.30078125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 263305408, + "step": 2715 + }, + { + "epoch": 0.26554556120453654, + "grad_norm": 19.866237692576565, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 263402804, + "step": 2716 + }, + { + "epoch": 0.26554556120453654, + "loss": 0.1607261598110199, + "loss_ce": 0.006078329868614674, + "loss_iou": 0.36328125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 263402804, + "step": 2716 + }, + { + "epoch": 0.26564333202972235, + "grad_norm": 6.936200513898392, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 263499056, + "step": 2717 + }, + { + "epoch": 0.26564333202972235, + "loss": 0.08530032634735107, + "loss_ce": 0.004306679591536522, + "loss_iou": 0.458984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 263499056, + "step": 2717 + }, + { + "epoch": 0.2657411028549081, + "grad_norm": 4.1088460460149605, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 263596520, + "step": 2718 + }, + { + "epoch": 0.2657411028549081, + "loss": 0.13064001500606537, + "loss_ce": 0.00570105854421854, + "loss_iou": 0.43359375, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 263596520, + "step": 2718 + }, + { + "epoch": 0.26583887368009385, + "grad_norm": 10.830256996031565, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 263693528, + "step": 2719 + }, + { + "epoch": 0.26583887368009385, + "loss": 0.07117263972759247, + "loss_ce": 0.004453579895198345, + "loss_iou": 0.298828125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 263693528, + "step": 2719 + }, + { + "epoch": 0.26593664450527965, + "grad_norm": 22.05606742774027, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 263789588, + "step": 2720 + }, + { + "epoch": 0.26593664450527965, + "loss": 0.08336633443832397, + "loss_ce": 0.007499640341848135, + "loss_iou": 0.31640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 263789588, + "step": 2720 + }, + { + "epoch": 0.2660344153304654, + "grad_norm": 13.76627126322831, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 263886240, + "step": 2721 + }, + { + "epoch": 0.2660344153304654, + "loss": 0.082900270819664, + "loss_ce": 0.0052330382168293, + "loss_iou": 0.34765625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 263886240, + "step": 2721 + }, + { + "epoch": 0.26613218615565115, + "grad_norm": 9.134780871443548, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 263983316, + "step": 2722 + }, + { + "epoch": 0.26613218615565115, + "loss": 0.08572793006896973, + "loss_ce": 0.0036814268678426743, + "loss_iou": 0.3984375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 263983316, + "step": 2722 + }, + { + "epoch": 0.2662299569808369, + "grad_norm": 4.3238487610087875, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 264080792, + "step": 2723 + }, + { + "epoch": 0.2662299569808369, + "loss": 0.10039526224136353, + "loss_ce": 0.00357824400998652, + "loss_iou": 0.41796875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 264080792, + "step": 2723 + }, + { + "epoch": 0.2663277278060227, + "grad_norm": 69.27765707562419, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 264176912, + "step": 2724 + }, + { + "epoch": 0.2663277278060227, + "loss": 0.12349755316972733, + "loss_ce": 0.0037923508789390326, + "loss_iou": 0.30859375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 264176912, + "step": 2724 + }, + { + "epoch": 0.26642549863120846, + "grad_norm": 10.472936475243916, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 264273820, + "step": 2725 + }, + { + "epoch": 0.26642549863120846, + "loss": 0.06648185849189758, + "loss_ce": 0.008559498004615307, + "loss_iou": 0.357421875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 264273820, + "step": 2725 + }, + { + "epoch": 0.2665232694563942, + "grad_norm": 7.693443837928322, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 264370668, + "step": 2726 + }, + { + "epoch": 0.2665232694563942, + "loss": 0.12280598282814026, + "loss_ce": 0.010791206732392311, + "loss_iou": 0.302734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 264370668, + "step": 2726 + }, + { + "epoch": 0.26662104028157996, + "grad_norm": 9.795370849477443, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 264467216, + "step": 2727 + }, + { + "epoch": 0.26662104028157996, + "loss": 0.07869742810726166, + "loss_ce": 0.0027696939650923014, + "loss_iou": 0.36328125, + "loss_num": 0.01513671875, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 264467216, + "step": 2727 + }, + { + "epoch": 0.26671881110676576, + "grad_norm": 6.33046478666018, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 264564360, + "step": 2728 + }, + { + "epoch": 0.26671881110676576, + "loss": 0.0654779002070427, + "loss_ce": 0.013582758605480194, + "loss_iou": 0.28125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 264564360, + "step": 2728 + }, + { + "epoch": 0.2668165819319515, + "grad_norm": 3.2764334629803233, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 264661696, + "step": 2729 + }, + { + "epoch": 0.2668165819319515, + "loss": 0.06511892378330231, + "loss_ce": 0.00225271238014102, + "loss_iou": 0.33203125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 264661696, + "step": 2729 + }, + { + "epoch": 0.26691435275713726, + "grad_norm": 8.663104460667782, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 264757660, + "step": 2730 + }, + { + "epoch": 0.26691435275713726, + "loss": 0.0642634853720665, + "loss_ce": 0.004746578633785248, + "loss_iou": 0.197265625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 264757660, + "step": 2730 + }, + { + "epoch": 0.267012123582323, + "grad_norm": 6.453272132126666, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 264855236, + "step": 2731 + }, + { + "epoch": 0.267012123582323, + "loss": 0.07285032421350479, + "loss_ce": 0.006520368158817291, + "loss_iou": 0.3359375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 264855236, + "step": 2731 + }, + { + "epoch": 0.2671098944075088, + "grad_norm": 5.487328721270519, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 264952392, + "step": 2732 + }, + { + "epoch": 0.2671098944075088, + "loss": 0.04420265182852745, + "loss_ce": 0.004907454364001751, + "loss_iou": 0.34765625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 264952392, + "step": 2732 + }, + { + "epoch": 0.26720766523269457, + "grad_norm": 10.006843917029276, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 265049480, + "step": 2733 + }, + { + "epoch": 0.26720766523269457, + "loss": 0.07882164418697357, + "loss_ce": 0.003427965799346566, + "loss_iou": 0.294921875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 265049480, + "step": 2733 + }, + { + "epoch": 0.2673054360578803, + "grad_norm": 12.26502909157287, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 265146888, + "step": 2734 + }, + { + "epoch": 0.2673054360578803, + "loss": 0.0842096135020256, + "loss_ce": 0.007717300672084093, + "loss_iou": 0.26171875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 265146888, + "step": 2734 + }, + { + "epoch": 0.26740320688306607, + "grad_norm": 19.450561280926106, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 265244292, + "step": 2735 + }, + { + "epoch": 0.26740320688306607, + "loss": 0.06424015015363693, + "loss_ce": 0.006012612022459507, + "loss_iou": 0.1884765625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 265244292, + "step": 2735 + }, + { + "epoch": 0.2675009777082519, + "grad_norm": 16.217328834808704, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 265341332, + "step": 2736 + }, + { + "epoch": 0.2675009777082519, + "loss": 0.1075192466378212, + "loss_ce": 0.009235483594238758, + "loss_iou": 0.29296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 265341332, + "step": 2736 + }, + { + "epoch": 0.2675987485334376, + "grad_norm": 25.253570566993343, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 265437628, + "step": 2737 + }, + { + "epoch": 0.2675987485334376, + "loss": 0.08703946322202682, + "loss_ce": 0.006190769374370575, + "loss_iou": 0.34375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 265437628, + "step": 2737 + }, + { + "epoch": 0.2676965193586234, + "grad_norm": 25.57823383008019, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 265535628, + "step": 2738 + }, + { + "epoch": 0.2676965193586234, + "loss": 0.07137501984834671, + "loss_ce": 0.0034276358783245087, + "loss_iou": 0.357421875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 265535628, + "step": 2738 + }, + { + "epoch": 0.2677942901838091, + "grad_norm": 7.703147302055973, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 265632284, + "step": 2739 + }, + { + "epoch": 0.2677942901838091, + "loss": 0.08443354070186615, + "loss_ce": 0.0031652275938540697, + "loss_iou": 0.392578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 265632284, + "step": 2739 + }, + { + "epoch": 0.26789206100899493, + "grad_norm": 4.892655689316549, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 265729292, + "step": 2740 + }, + { + "epoch": 0.26789206100899493, + "loss": 0.08698364347219467, + "loss_ce": 0.0027742013335227966, + "loss_iou": 0.3046875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 265729292, + "step": 2740 + }, + { + "epoch": 0.2679898318341807, + "grad_norm": 3.870331762505342, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 265826104, + "step": 2741 + }, + { + "epoch": 0.2679898318341807, + "loss": 0.0933440700173378, + "loss_ce": 0.008016924373805523, + "loss_iou": 0.345703125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 265826104, + "step": 2741 + }, + { + "epoch": 0.26808760265936643, + "grad_norm": 3.1422756087577626, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 265923416, + "step": 2742 + }, + { + "epoch": 0.26808760265936643, + "loss": 0.08873666822910309, + "loss_ce": 0.0067664459347724915, + "loss_iou": 0.30859375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 265923416, + "step": 2742 + }, + { + "epoch": 0.26818537348455224, + "grad_norm": 10.000930467075522, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 266019796, + "step": 2743 + }, + { + "epoch": 0.26818537348455224, + "loss": 0.0701533779501915, + "loss_ce": 0.0006801107083447278, + "loss_iou": 0.3828125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 266019796, + "step": 2743 + }, + { + "epoch": 0.268283144309738, + "grad_norm": 5.323608735507126, + "learning_rate": 5e-05, + "loss": 0.128, + "num_input_tokens_seen": 266116376, + "step": 2744 + }, + { + "epoch": 0.268283144309738, + "loss": 0.12552617490291595, + "loss_ce": 0.0020520451944321394, + "loss_iou": 0.412109375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 266116376, + "step": 2744 + }, + { + "epoch": 0.26838091513492374, + "grad_norm": 7.462592702809016, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 266213168, + "step": 2745 + }, + { + "epoch": 0.26838091513492374, + "loss": 0.10194602608680725, + "loss_ce": 0.004686507862061262, + "loss_iou": 0.41015625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 266213168, + "step": 2745 + }, + { + "epoch": 0.2684786859601095, + "grad_norm": 9.573028560160177, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 266310044, + "step": 2746 + }, + { + "epoch": 0.2684786859601095, + "loss": 0.08197098225355148, + "loss_ce": 0.0057685887441039085, + "loss_iou": 0.361328125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 266310044, + "step": 2746 + }, + { + "epoch": 0.2685764567852953, + "grad_norm": 5.3146936141783385, + "learning_rate": 5e-05, + "loss": 0.1214, + "num_input_tokens_seen": 266407288, + "step": 2747 + }, + { + "epoch": 0.2685764567852953, + "loss": 0.15073741972446442, + "loss_ce": 0.004420890472829342, + "loss_iou": 0.373046875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 266407288, + "step": 2747 + }, + { + "epoch": 0.26867422761048104, + "grad_norm": 10.150426973779254, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 266504992, + "step": 2748 + }, + { + "epoch": 0.26867422761048104, + "loss": 0.08910179138183594, + "loss_ce": 0.0073299407958984375, + "loss_iou": 0.328125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 266504992, + "step": 2748 + }, + { + "epoch": 0.2687719984356668, + "grad_norm": 12.085558376122808, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 266601436, + "step": 2749 + }, + { + "epoch": 0.2687719984356668, + "loss": 0.08911606669425964, + "loss_ce": 0.0017594986129552126, + "loss_iou": 0.32421875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 266601436, + "step": 2749 + }, + { + "epoch": 0.26886976926085254, + "grad_norm": 11.96441964630578, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 266698564, + "step": 2750 + }, + { + "epoch": 0.26886976926085254, + "eval_seeclick_CIoU": 0.4522217810153961, + "eval_seeclick_GIoU": 0.46431398391723633, + "eval_seeclick_IoU": 0.5054052919149399, + "eval_seeclick_MAE_all": 0.08591077104210854, + "eval_seeclick_MAE_h": 0.05011819116771221, + "eval_seeclick_MAE_w": 0.12015698477625847, + "eval_seeclick_MAE_x": 0.1270672008395195, + "eval_seeclick_MAE_y": 0.04630070924758911, + "eval_seeclick_NUM_probability": 0.9999886155128479, + "eval_seeclick_inside_bbox": 0.7997159063816071, + "eval_seeclick_loss": 0.296013742685318, + "eval_seeclick_loss_ce": 0.009902410674840212, + "eval_seeclick_loss_iou": 0.4208984375, + "eval_seeclick_loss_num": 0.0580902099609375, + "eval_seeclick_loss_xval": 0.29046630859375, + "eval_seeclick_runtime": 73.1436, + "eval_seeclick_samples_per_second": 0.588, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 266698564, + "step": 2750 + }, + { + "epoch": 0.26886976926085254, + "eval_icons_CIoU": 0.6213391125202179, + "eval_icons_GIoU": 0.6204680800437927, + "eval_icons_IoU": 0.6537266671657562, + "eval_icons_MAE_all": 0.06795046851038933, + "eval_icons_MAE_h": 0.08277138508856297, + "eval_icons_MAE_w": 0.05795206129550934, + "eval_icons_MAE_x": 0.05844835191965103, + "eval_icons_MAE_y": 0.07263006083667278, + "eval_icons_NUM_probability": 0.9999874234199524, + "eval_icons_inside_bbox": 0.7673611044883728, + "eval_icons_loss": 0.20399276912212372, + "eval_icons_loss_ce": 7.357323056567111e-06, + "eval_icons_loss_iou": 0.3995361328125, + "eval_icons_loss_num": 0.043209075927734375, + "eval_icons_loss_xval": 0.216094970703125, + "eval_icons_runtime": 84.705, + "eval_icons_samples_per_second": 0.59, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 266698564, + "step": 2750 + }, + { + "epoch": 0.26886976926085254, + "eval_screenspot_CIoU": 0.20759183913469315, + "eval_screenspot_GIoU": 0.18798350542783737, + "eval_screenspot_IoU": 0.3214803785085678, + "eval_screenspot_MAE_all": 0.18071023126443228, + "eval_screenspot_MAE_h": 0.1254156306385994, + "eval_screenspot_MAE_w": 0.2585581839084625, + "eval_screenspot_MAE_x": 0.21830086906750998, + "eval_screenspot_MAE_y": 0.12056626131137212, + "eval_screenspot_NUM_probability": 0.9999914566675822, + "eval_screenspot_inside_bbox": 0.546666661898295, + "eval_screenspot_loss": 0.6328017711639404, + "eval_screenspot_loss_ce": 0.017821038141846657, + "eval_screenspot_loss_iou": 0.3050537109375, + "eval_screenspot_loss_num": 0.124359130859375, + "eval_screenspot_loss_xval": 0.6215413411458334, + "eval_screenspot_runtime": 145.5613, + "eval_screenspot_samples_per_second": 0.611, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 266698564, + "step": 2750 + }, + { + "epoch": 0.26886976926085254, + "eval_compot_CIoU": 0.4624600112438202, + "eval_compot_GIoU": 0.4561523497104645, + "eval_compot_IoU": 0.528873473405838, + "eval_compot_MAE_all": 0.08561315014958382, + "eval_compot_MAE_h": 0.06682112440466881, + "eval_compot_MAE_w": 0.10461315885186195, + "eval_compot_MAE_x": 0.10034246742725372, + "eval_compot_MAE_y": 0.07067583501338959, + "eval_compot_NUM_probability": 0.9999843239784241, + "eval_compot_inside_bbox": 0.7204861044883728, + "eval_compot_loss": 0.2712126672267914, + "eval_compot_loss_ce": 0.01388983754441142, + "eval_compot_loss_iou": 0.494384765625, + "eval_compot_loss_num": 0.045993804931640625, + "eval_compot_loss_xval": 0.2301025390625, + "eval_compot_runtime": 92.5955, + "eval_compot_samples_per_second": 0.54, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 266698564, + "step": 2750 + }, + { + "epoch": 0.26886976926085254, + "loss": 0.23613250255584717, + "loss_ce": 0.0146969398483634, + "loss_iou": 0.53515625, + "loss_num": 0.044189453125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 266698564, + "step": 2750 + }, + { + "epoch": 0.26896754008603835, + "grad_norm": 13.995928817734555, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 266796216, + "step": 2751 + }, + { + "epoch": 0.26896754008603835, + "loss": 0.04510434716939926, + "loss_ce": 0.0034631132148206234, + "loss_iou": 0.2890625, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 266796216, + "step": 2751 + }, + { + "epoch": 0.2690653109112241, + "grad_norm": 3.321657147842021, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 266893108, + "step": 2752 + }, + { + "epoch": 0.2690653109112241, + "loss": 0.0899946391582489, + "loss_ce": 0.007246230263262987, + "loss_iou": 0.326171875, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 266893108, + "step": 2752 + }, + { + "epoch": 0.26916308173640985, + "grad_norm": 15.191277773700605, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 266990088, + "step": 2753 + }, + { + "epoch": 0.26916308173640985, + "loss": 0.1076417863368988, + "loss_ce": 0.006186101585626602, + "loss_iou": 0.296875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 266990088, + "step": 2753 + }, + { + "epoch": 0.2692608525615956, + "grad_norm": 21.77100250457113, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 267087436, + "step": 2754 + }, + { + "epoch": 0.2692608525615956, + "loss": 0.1250688135623932, + "loss_ce": 0.003608845639973879, + "loss_iou": 0.41796875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 267087436, + "step": 2754 + }, + { + "epoch": 0.2693586233867814, + "grad_norm": 10.18320797620853, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 267184608, + "step": 2755 + }, + { + "epoch": 0.2693586233867814, + "loss": 0.11058121919631958, + "loss_ce": 0.007752242498099804, + "loss_iou": 0.322265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 267184608, + "step": 2755 + }, + { + "epoch": 0.26945639421196715, + "grad_norm": 22.402092412010045, + "learning_rate": 5e-05, + "loss": 0.1503, + "num_input_tokens_seen": 267281768, + "step": 2756 + }, + { + "epoch": 0.26945639421196715, + "loss": 0.1651855707168579, + "loss_ce": 0.0070740049704909325, + "loss_iou": 0.408203125, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 267281768, + "step": 2756 + }, + { + "epoch": 0.2695541650371529, + "grad_norm": 5.427403369378874, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 267378936, + "step": 2757 + }, + { + "epoch": 0.2695541650371529, + "loss": 0.11290295422077179, + "loss_ce": 0.006762820761650801, + "loss_iou": 0.337890625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 267378936, + "step": 2757 + }, + { + "epoch": 0.26965193586233865, + "grad_norm": 38.94461102263954, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 267476556, + "step": 2758 + }, + { + "epoch": 0.26965193586233865, + "loss": 0.12742719054222107, + "loss_ce": 0.005265326239168644, + "loss_iou": 0.34765625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 267476556, + "step": 2758 + }, + { + "epoch": 0.26974970668752446, + "grad_norm": 8.141773673657813, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 267573240, + "step": 2759 + }, + { + "epoch": 0.26974970668752446, + "loss": 0.09803138673305511, + "loss_ce": 0.006692275870591402, + "loss_iou": 0.375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 267573240, + "step": 2759 + }, + { + "epoch": 0.2698474775127102, + "grad_norm": 50.261321383805914, + "learning_rate": 5e-05, + "loss": 0.1334, + "num_input_tokens_seen": 267669264, + "step": 2760 + }, + { + "epoch": 0.2698474775127102, + "loss": 0.10990646481513977, + "loss_ce": 0.003258967772126198, + "loss_iou": 0.2255859375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 267669264, + "step": 2760 + }, + { + "epoch": 0.26994524833789596, + "grad_norm": 20.711063802195397, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 267767068, + "step": 2761 + }, + { + "epoch": 0.26994524833789596, + "loss": 0.10299495607614517, + "loss_ce": 0.009717978537082672, + "loss_iou": 0.3515625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 267767068, + "step": 2761 + }, + { + "epoch": 0.2700430191630817, + "grad_norm": 4.141701279081231, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 267863892, + "step": 2762 + }, + { + "epoch": 0.2700430191630817, + "loss": 0.06717251241207123, + "loss_ce": 0.0040316395461559296, + "loss_iou": 0.333984375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 267863892, + "step": 2762 + }, + { + "epoch": 0.2701407899882675, + "grad_norm": 4.893140615160566, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 267960452, + "step": 2763 + }, + { + "epoch": 0.2701407899882675, + "loss": 0.09745761752128601, + "loss_ce": 0.006225316785275936, + "loss_iou": 0.287109375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 267960452, + "step": 2763 + }, + { + "epoch": 0.27023856081345327, + "grad_norm": 1.77337989123038, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 268057104, + "step": 2764 + }, + { + "epoch": 0.27023856081345327, + "loss": 0.0993776023387909, + "loss_ce": 0.007565474137663841, + "loss_iou": 0.2294921875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 268057104, + "step": 2764 + }, + { + "epoch": 0.270336331638639, + "grad_norm": 4.308928173488821, + "learning_rate": 5e-05, + "loss": 0.1195, + "num_input_tokens_seen": 268153860, + "step": 2765 + }, + { + "epoch": 0.270336331638639, + "loss": 0.15814581513404846, + "loss_ce": 0.0026740061584860086, + "loss_iou": 0.1982421875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 268153860, + "step": 2765 + }, + { + "epoch": 0.2704341024638248, + "grad_norm": 11.040981217544228, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 268251724, + "step": 2766 + }, + { + "epoch": 0.2704341024638248, + "loss": 0.10380300134420395, + "loss_ce": 0.005342081189155579, + "loss_iou": 0.263671875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 268251724, + "step": 2766 + }, + { + "epoch": 0.27053187328901057, + "grad_norm": 8.947738875602234, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 268348372, + "step": 2767 + }, + { + "epoch": 0.27053187328901057, + "loss": 0.059372931718826294, + "loss_ce": 0.005600958596915007, + "loss_iou": 0.337890625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 268348372, + "step": 2767 + }, + { + "epoch": 0.2706296441141963, + "grad_norm": 12.106800649396103, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 268446664, + "step": 2768 + }, + { + "epoch": 0.2706296441141963, + "loss": 0.10232571512460709, + "loss_ce": 0.008255278691649437, + "loss_iou": 0.326171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 268446664, + "step": 2768 + }, + { + "epoch": 0.27072741493938207, + "grad_norm": 5.395727099087201, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 268543508, + "step": 2769 + }, + { + "epoch": 0.27072741493938207, + "loss": 0.08824887126684189, + "loss_ce": 0.0036999264266341925, + "loss_iou": 0.224609375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 268543508, + "step": 2769 + }, + { + "epoch": 0.2708251857645679, + "grad_norm": 3.0330583452293234, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 268639776, + "step": 2770 + }, + { + "epoch": 0.2708251857645679, + "loss": 0.11265412718057632, + "loss_ce": 0.008131427690386772, + "loss_iou": 0.2890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 268639776, + "step": 2770 + }, + { + "epoch": 0.2709229565897536, + "grad_norm": 11.597795122004943, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 268736532, + "step": 2771 + }, + { + "epoch": 0.2709229565897536, + "loss": 0.0771930068731308, + "loss_ce": 0.002970446366816759, + "loss_iou": 0.279296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 268736532, + "step": 2771 + }, + { + "epoch": 0.2710207274149394, + "grad_norm": 18.79663308624466, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 268834336, + "step": 2772 + }, + { + "epoch": 0.2710207274149394, + "loss": 0.1160636693239212, + "loss_ce": 0.005986760836094618, + "loss_iou": 0.41796875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 268834336, + "step": 2772 + }, + { + "epoch": 0.2711184982401251, + "grad_norm": 33.77396234309606, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 268930836, + "step": 2773 + }, + { + "epoch": 0.2711184982401251, + "loss": 0.09437863528728485, + "loss_ce": 0.006133238784968853, + "loss_iou": 0.35546875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 268930836, + "step": 2773 + }, + { + "epoch": 0.27121626906531093, + "grad_norm": 14.177465502835581, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 269028316, + "step": 2774 + }, + { + "epoch": 0.27121626906531093, + "loss": 0.09440858662128448, + "loss_ce": 0.0049310531467199326, + "loss_iou": 0.333984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 269028316, + "step": 2774 + }, + { + "epoch": 0.2713140398904967, + "grad_norm": 7.587279879346152, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 269125444, + "step": 2775 + }, + { + "epoch": 0.2713140398904967, + "loss": 0.07847750186920166, + "loss_ce": 0.0034652925096452236, + "loss_iou": 0.4140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 269125444, + "step": 2775 + }, + { + "epoch": 0.27141181071568243, + "grad_norm": 26.443003048611395, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 269222540, + "step": 2776 + }, + { + "epoch": 0.27141181071568243, + "loss": 0.1277838796377182, + "loss_ce": 0.0035163024440407753, + "loss_iou": 0.3828125, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 269222540, + "step": 2776 + }, + { + "epoch": 0.2715095815408682, + "grad_norm": 47.98452249225364, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 269319216, + "step": 2777 + }, + { + "epoch": 0.2715095815408682, + "loss": 0.1074623167514801, + "loss_ce": 0.00541153596714139, + "loss_iou": 0.32421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 269319216, + "step": 2777 + }, + { + "epoch": 0.271607352366054, + "grad_norm": 13.359707085779855, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 269416088, + "step": 2778 + }, + { + "epoch": 0.271607352366054, + "loss": 0.11632082611322403, + "loss_ce": 0.006060821004211903, + "loss_iou": 0.33984375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 269416088, + "step": 2778 + }, + { + "epoch": 0.27170512319123974, + "grad_norm": 3.220862290845736, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 269513084, + "step": 2779 + }, + { + "epoch": 0.27170512319123974, + "loss": 0.10626277327537537, + "loss_ce": 0.0031133615411818027, + "loss_iou": 0.3671875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 269513084, + "step": 2779 + }, + { + "epoch": 0.2718028940164255, + "grad_norm": 3.410660455693471, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 269609640, + "step": 2780 + }, + { + "epoch": 0.2718028940164255, + "loss": 0.1197698786854744, + "loss_ce": 0.003986186347901821, + "loss_iou": 0.357421875, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 269609640, + "step": 2780 + }, + { + "epoch": 0.27190066484161124, + "grad_norm": 13.134818293040048, + "learning_rate": 5e-05, + "loss": 0.1535, + "num_input_tokens_seen": 269706244, + "step": 2781 + }, + { + "epoch": 0.27190066484161124, + "loss": 0.16816502809524536, + "loss_ce": 0.04978734999895096, + "loss_iou": 0.412109375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 269706244, + "step": 2781 + }, + { + "epoch": 0.27199843566679704, + "grad_norm": 29.92309592606207, + "learning_rate": 5e-05, + "loss": 0.3089, + "num_input_tokens_seen": 269803080, + "step": 2782 + }, + { + "epoch": 0.27199843566679704, + "loss": 0.2942725419998169, + "loss_ce": 0.2154609113931656, + "loss_iou": 0.34765625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 269803080, + "step": 2782 + }, + { + "epoch": 0.2720962064919828, + "grad_norm": 419.98502712352996, + "learning_rate": 5e-05, + "loss": 0.5867, + "num_input_tokens_seen": 269899480, + "step": 2783 + }, + { + "epoch": 0.2720962064919828, + "loss": 0.5458650588989258, + "loss_ce": 0.40816977620124817, + "loss_iou": 0.40625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 269899480, + "step": 2783 + }, + { + "epoch": 0.27219397731716855, + "grad_norm": 91.1179287330551, + "learning_rate": 5e-05, + "loss": 0.2543, + "num_input_tokens_seen": 269995712, + "step": 2784 + }, + { + "epoch": 0.27219397731716855, + "loss": 0.20920395851135254, + "loss_ce": 0.02402329072356224, + "loss_iou": 0.423828125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 269995712, + "step": 2784 + }, + { + "epoch": 0.2722917481423543, + "grad_norm": 52.74929149718549, + "learning_rate": 5e-05, + "loss": 2.1986, + "num_input_tokens_seen": 270093012, + "step": 2785 + }, + { + "epoch": 0.2722917481423543, + "loss": 2.1678080558776855, + "loss_ce": 2.0247416496276855, + "loss_iou": 0.478515625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 270093012, + "step": 2785 + }, + { + "epoch": 0.2723895189675401, + "grad_norm": 55.04223003715528, + "learning_rate": 5e-05, + "loss": 0.1667, + "num_input_tokens_seen": 270189560, + "step": 2786 + }, + { + "epoch": 0.2723895189675401, + "loss": 0.13735514879226685, + "loss_ce": 0.00857097003608942, + "loss_iou": 0.361328125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 270189560, + "step": 2786 + }, + { + "epoch": 0.27248728979272585, + "grad_norm": 12.000211915323275, + "learning_rate": 5e-05, + "loss": 0.1657, + "num_input_tokens_seen": 270286536, + "step": 2787 + }, + { + "epoch": 0.27248728979272585, + "loss": 0.15043367445468903, + "loss_ce": 0.022717613726854324, + "loss_iou": 0.34765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 270286536, + "step": 2787 + }, + { + "epoch": 0.2725850606179116, + "grad_norm": 29.84064817624259, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 270384720, + "step": 2788 + }, + { + "epoch": 0.2725850606179116, + "loss": 0.13167834281921387, + "loss_ce": 0.003748659510165453, + "loss_iou": 0.35546875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 270384720, + "step": 2788 + }, + { + "epoch": 0.2726828314430974, + "grad_norm": 9.314092214562104, + "learning_rate": 5e-05, + "loss": 0.1276, + "num_input_tokens_seen": 270482608, + "step": 2789 + }, + { + "epoch": 0.2726828314430974, + "loss": 0.14131107926368713, + "loss_ce": 0.015151415951550007, + "loss_iou": 0.3828125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 270482608, + "step": 2789 + }, + { + "epoch": 0.27278060226828316, + "grad_norm": 9.742238760842335, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 270579812, + "step": 2790 + }, + { + "epoch": 0.27278060226828316, + "loss": 0.07642621546983719, + "loss_ce": 0.003176403231918812, + "loss_iou": 0.2578125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 270579812, + "step": 2790 + }, + { + "epoch": 0.2728783730934689, + "grad_norm": 14.893950700504114, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 270677484, + "step": 2791 + }, + { + "epoch": 0.2728783730934689, + "loss": 0.10585204511880875, + "loss_ce": 0.0035876454785466194, + "loss_iou": 0.388671875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 270677484, + "step": 2791 + }, + { + "epoch": 0.27297614391865466, + "grad_norm": 2.757663166896508, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 270774488, + "step": 2792 + }, + { + "epoch": 0.27297614391865466, + "loss": 0.04923035204410553, + "loss_ce": 0.003423466579988599, + "loss_iou": 0.359375, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 270774488, + "step": 2792 + }, + { + "epoch": 0.27307391474384046, + "grad_norm": 9.677598896847794, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 270871216, + "step": 2793 + }, + { + "epoch": 0.27307391474384046, + "loss": 0.09585042297840118, + "loss_ce": 0.002817586064338684, + "loss_iou": 0.37109375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 270871216, + "step": 2793 + }, + { + "epoch": 0.2731716855690262, + "grad_norm": 39.13184234247153, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 270969360, + "step": 2794 + }, + { + "epoch": 0.2731716855690262, + "loss": 0.14677393436431885, + "loss_ce": 0.009658455848693848, + "loss_iou": 0.28125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 270969360, + "step": 2794 + }, + { + "epoch": 0.27326945639421196, + "grad_norm": 10.453920315597951, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 271066652, + "step": 2795 + }, + { + "epoch": 0.27326945639421196, + "loss": 0.06374919414520264, + "loss_ce": 0.0066660623997449875, + "loss_iou": 0.359375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 271066652, + "step": 2795 + }, + { + "epoch": 0.2733672272193977, + "grad_norm": 7.642707377314795, + "learning_rate": 5e-05, + "loss": 0.1194, + "num_input_tokens_seen": 271164860, + "step": 2796 + }, + { + "epoch": 0.2733672272193977, + "loss": 0.134183868765831, + "loss_ce": 0.00825307797640562, + "loss_iou": 0.39453125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 271164860, + "step": 2796 + }, + { + "epoch": 0.2734649980445835, + "grad_norm": 10.871564156309311, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 271262300, + "step": 2797 + }, + { + "epoch": 0.2734649980445835, + "loss": 0.051905907690525055, + "loss_ce": 0.005946434568613768, + "loss_iou": 0.2578125, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 271262300, + "step": 2797 + }, + { + "epoch": 0.27356276886976927, + "grad_norm": 15.650858560004673, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 271359224, + "step": 2798 + }, + { + "epoch": 0.27356276886976927, + "loss": 0.08525985479354858, + "loss_ce": 0.007668913342058659, + "loss_iou": 0.2216796875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 271359224, + "step": 2798 + }, + { + "epoch": 0.273660539694955, + "grad_norm": 5.719232393646574, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 271455628, + "step": 2799 + }, + { + "epoch": 0.273660539694955, + "loss": 0.09624329209327698, + "loss_ce": 0.007887275889515877, + "loss_iou": 0.2412109375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 271455628, + "step": 2799 + }, + { + "epoch": 0.27375831052014077, + "grad_norm": 17.52124404636901, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 271551720, + "step": 2800 + }, + { + "epoch": 0.27375831052014077, + "loss": 0.06792883574962616, + "loss_ce": 0.003544371109455824, + "loss_iou": 0.3125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 271551720, + "step": 2800 + }, + { + "epoch": 0.2738560813453266, + "grad_norm": 1.4917494416123762, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 271647920, + "step": 2801 + }, + { + "epoch": 0.2738560813453266, + "loss": 0.06813886016607285, + "loss_ce": 0.005852485075592995, + "loss_iou": 0.345703125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 271647920, + "step": 2801 + }, + { + "epoch": 0.2739538521705123, + "grad_norm": 8.818832399387382, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 271744744, + "step": 2802 + }, + { + "epoch": 0.2739538521705123, + "loss": 0.07945425808429718, + "loss_ce": 0.006006073672324419, + "loss_iou": 0.3828125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 271744744, + "step": 2802 + }, + { + "epoch": 0.2740516229956981, + "grad_norm": 9.632670610587983, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 271841728, + "step": 2803 + }, + { + "epoch": 0.2740516229956981, + "loss": 0.10621330142021179, + "loss_ce": 0.0052001141011714935, + "loss_iou": 0.3125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 271841728, + "step": 2803 + }, + { + "epoch": 0.2741493938208838, + "grad_norm": 7.7145209974494175, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 271939104, + "step": 2804 + }, + { + "epoch": 0.2741493938208838, + "loss": 0.10007932037115097, + "loss_ce": 0.004833956249058247, + "loss_iou": 0.330078125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 271939104, + "step": 2804 + }, + { + "epoch": 0.27424716464606963, + "grad_norm": 9.23043797765456, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 272034924, + "step": 2805 + }, + { + "epoch": 0.27424716464606963, + "loss": 0.07652340084314346, + "loss_ce": 0.010948757641017437, + "loss_iou": 0.2060546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 272034924, + "step": 2805 + }, + { + "epoch": 0.2743449354712554, + "grad_norm": 21.727044830386497, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 272131828, + "step": 2806 + }, + { + "epoch": 0.2743449354712554, + "loss": 0.07944740355014801, + "loss_ce": 0.0023752576671540737, + "loss_iou": 0.365234375, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 272131828, + "step": 2806 + }, + { + "epoch": 0.27444270629644113, + "grad_norm": 12.953653158589654, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 272228092, + "step": 2807 + }, + { + "epoch": 0.27444270629644113, + "loss": 0.07254058122634888, + "loss_ce": 0.004700008779764175, + "loss_iou": 0.302734375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 272228092, + "step": 2807 + }, + { + "epoch": 0.2745404771216269, + "grad_norm": 10.505012930218033, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 272324524, + "step": 2808 + }, + { + "epoch": 0.2745404771216269, + "loss": 0.09994256496429443, + "loss_ce": 0.005460144951939583, + "loss_iou": 0.25390625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 272324524, + "step": 2808 + }, + { + "epoch": 0.2746382479468127, + "grad_norm": 13.448864460424039, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 272422356, + "step": 2809 + }, + { + "epoch": 0.2746382479468127, + "loss": 0.10254205763339996, + "loss_ce": 0.0026275096461176872, + "loss_iou": 0.43359375, + "loss_num": 0.0198974609375, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 272422356, + "step": 2809 + }, + { + "epoch": 0.27473601877199844, + "grad_norm": 33.230704414745865, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 272519052, + "step": 2810 + }, + { + "epoch": 0.27473601877199844, + "loss": 0.11369079351425171, + "loss_ce": 0.0064215101301670074, + "loss_iou": 0.40625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 272519052, + "step": 2810 + }, + { + "epoch": 0.2748337895971842, + "grad_norm": 5.442840962732514, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 272616388, + "step": 2811 + }, + { + "epoch": 0.2748337895971842, + "loss": 0.11371710896492004, + "loss_ce": 0.008378060534596443, + "loss_iou": 0.35546875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 272616388, + "step": 2811 + }, + { + "epoch": 0.27493156042237, + "grad_norm": 8.937260119706478, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 272713416, + "step": 2812 + }, + { + "epoch": 0.27493156042237, + "loss": 0.07992972433567047, + "loss_ce": 0.006222149357199669, + "loss_iou": 0.296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 272713416, + "step": 2812 + }, + { + "epoch": 0.27502933124755574, + "grad_norm": 6.6276104844214245, + "learning_rate": 5e-05, + "loss": 0.1434, + "num_input_tokens_seen": 272810160, + "step": 2813 + }, + { + "epoch": 0.27502933124755574, + "loss": 0.17232945561408997, + "loss_ce": 0.004009762778878212, + "loss_iou": 0.283203125, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 272810160, + "step": 2813 + }, + { + "epoch": 0.2751271020727415, + "grad_norm": 6.518358850324884, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 272906736, + "step": 2814 + }, + { + "epoch": 0.2751271020727415, + "loss": 0.0852765440940857, + "loss_ce": 0.0030316724441945553, + "loss_iou": 0.2890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 272906736, + "step": 2814 + }, + { + "epoch": 0.27522487289792724, + "grad_norm": 4.262410944688486, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 273002928, + "step": 2815 + }, + { + "epoch": 0.27522487289792724, + "loss": 0.10391522943973541, + "loss_ce": 0.00321485148742795, + "loss_iou": 0.267578125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 273002928, + "step": 2815 + }, + { + "epoch": 0.27532264372311305, + "grad_norm": 6.116734239674827, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 273100320, + "step": 2816 + }, + { + "epoch": 0.27532264372311305, + "loss": 0.09664566814899445, + "loss_ce": 0.007198653649538755, + "loss_iou": 0.349609375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 273100320, + "step": 2816 + }, + { + "epoch": 0.2754204145482988, + "grad_norm": 13.77269572789522, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 273197212, + "step": 2817 + }, + { + "epoch": 0.2754204145482988, + "loss": 0.05380290001630783, + "loss_ce": 0.006910732015967369, + "loss_iou": 0.29296875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 273197212, + "step": 2817 + }, + { + "epoch": 0.27551818537348455, + "grad_norm": 2.227695665925446, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 273293900, + "step": 2818 + }, + { + "epoch": 0.27551818537348455, + "loss": 0.07343241572380066, + "loss_ce": 0.00453898636624217, + "loss_iou": 0.2734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 273293900, + "step": 2818 + }, + { + "epoch": 0.2756159561986703, + "grad_norm": 5.447988154121383, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 273390656, + "step": 2819 + }, + { + "epoch": 0.2756159561986703, + "loss": 0.06344451010227203, + "loss_ce": 0.00410307664424181, + "loss_iou": 0.287109375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 273390656, + "step": 2819 + }, + { + "epoch": 0.2757137270238561, + "grad_norm": 2.6736142671147207, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 273487804, + "step": 2820 + }, + { + "epoch": 0.2757137270238561, + "loss": 0.1160447895526886, + "loss_ce": 0.009126454591751099, + "loss_iou": 0.287109375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 273487804, + "step": 2820 + }, + { + "epoch": 0.27581149784904185, + "grad_norm": 4.809335319514149, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 273584796, + "step": 2821 + }, + { + "epoch": 0.27581149784904185, + "loss": 0.07247066497802734, + "loss_ce": 0.004752154462039471, + "loss_iou": 0.287109375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 273584796, + "step": 2821 + }, + { + "epoch": 0.2759092686742276, + "grad_norm": 9.982650431763611, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 273682436, + "step": 2822 + }, + { + "epoch": 0.2759092686742276, + "loss": 0.10406336188316345, + "loss_ce": 0.004576053470373154, + "loss_iou": 0.2431640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 273682436, + "step": 2822 + }, + { + "epoch": 0.27600703949941335, + "grad_norm": 22.65813102233434, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 273779556, + "step": 2823 + }, + { + "epoch": 0.27600703949941335, + "loss": 0.1323850452899933, + "loss_ce": 0.006896770093590021, + "loss_iou": 0.328125, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 273779556, + "step": 2823 + }, + { + "epoch": 0.27610481032459916, + "grad_norm": 6.759273940091904, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 273876920, + "step": 2824 + }, + { + "epoch": 0.27610481032459916, + "loss": 0.05220365524291992, + "loss_ce": 0.00198697904124856, + "loss_iou": 0.37890625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 273876920, + "step": 2824 + }, + { + "epoch": 0.2762025811497849, + "grad_norm": 22.463367087679234, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 273974048, + "step": 2825 + }, + { + "epoch": 0.2762025811497849, + "loss": 0.10017865896224976, + "loss_ce": 0.003300604410469532, + "loss_iou": 0.306640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 273974048, + "step": 2825 + }, + { + "epoch": 0.27630035197497066, + "grad_norm": 16.666721287860458, + "learning_rate": 5e-05, + "loss": 0.133, + "num_input_tokens_seen": 274070656, + "step": 2826 + }, + { + "epoch": 0.27630035197497066, + "loss": 0.10954055935144424, + "loss_ce": 0.004254914354532957, + "loss_iou": 0.31640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 274070656, + "step": 2826 + }, + { + "epoch": 0.2763981228001564, + "grad_norm": 24.468897620830774, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 274168060, + "step": 2827 + }, + { + "epoch": 0.2763981228001564, + "loss": 0.08619514107704163, + "loss_ce": 0.0037976750172674656, + "loss_iou": 0.337890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 274168060, + "step": 2827 + }, + { + "epoch": 0.2764958936253422, + "grad_norm": 13.03917656907379, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 274264664, + "step": 2828 + }, + { + "epoch": 0.2764958936253422, + "loss": 0.055404651910066605, + "loss_ce": 0.005439746659249067, + "loss_iou": 0.314453125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 274264664, + "step": 2828 + }, + { + "epoch": 0.27659366445052797, + "grad_norm": 7.01287634425742, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 274361920, + "step": 2829 + }, + { + "epoch": 0.27659366445052797, + "loss": 0.06814536452293396, + "loss_ce": 0.0028682672418653965, + "loss_iou": 0.26953125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 274361920, + "step": 2829 + }, + { + "epoch": 0.2766914352757137, + "grad_norm": 7.52219808150921, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 274458408, + "step": 2830 + }, + { + "epoch": 0.2766914352757137, + "loss": 0.05563335120677948, + "loss_ce": 0.002212329301983118, + "loss_iou": 0.3046875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 274458408, + "step": 2830 + }, + { + "epoch": 0.27678920610089947, + "grad_norm": 11.898930368356895, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 274554860, + "step": 2831 + }, + { + "epoch": 0.27678920610089947, + "loss": 0.06441693753004074, + "loss_ce": 0.00302319647744298, + "loss_iou": 0.27734375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 274554860, + "step": 2831 + }, + { + "epoch": 0.27688697692608527, + "grad_norm": 4.436022480022074, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 274650792, + "step": 2832 + }, + { + "epoch": 0.27688697692608527, + "loss": 0.07579346746206284, + "loss_ce": 0.004000865388661623, + "loss_iou": 0.380859375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 274650792, + "step": 2832 + }, + { + "epoch": 0.276984747751271, + "grad_norm": 5.2546853026532405, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 274748500, + "step": 2833 + }, + { + "epoch": 0.276984747751271, + "loss": 0.08401792496442795, + "loss_ce": 0.005328349769115448, + "loss_iou": 0.275390625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 274748500, + "step": 2833 + }, + { + "epoch": 0.27708251857645677, + "grad_norm": 6.510611735050618, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 274846148, + "step": 2834 + }, + { + "epoch": 0.27708251857645677, + "loss": 0.1036158874630928, + "loss_ce": 0.008645184338092804, + "loss_iou": 0.44921875, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 274846148, + "step": 2834 + }, + { + "epoch": 0.2771802894016426, + "grad_norm": 30.22357644376808, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 274942564, + "step": 2835 + }, + { + "epoch": 0.2771802894016426, + "loss": 0.06770014017820358, + "loss_ce": 0.005459538195282221, + "loss_iou": 0.298828125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 274942564, + "step": 2835 + }, + { + "epoch": 0.2772780602268283, + "grad_norm": 13.83548468163319, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 275040052, + "step": 2836 + }, + { + "epoch": 0.2772780602268283, + "loss": 0.06312546133995056, + "loss_ce": 0.0029753129929304123, + "loss_iou": 0.427734375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 275040052, + "step": 2836 + }, + { + "epoch": 0.2773758310520141, + "grad_norm": 2.3958639730815166, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 275137280, + "step": 2837 + }, + { + "epoch": 0.2773758310520141, + "loss": 0.06069234013557434, + "loss_ce": 0.005569965112954378, + "loss_iou": 0.322265625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 275137280, + "step": 2837 + }, + { + "epoch": 0.2774736018771998, + "grad_norm": 14.273351189969233, + "learning_rate": 5e-05, + "loss": 0.157, + "num_input_tokens_seen": 275233540, + "step": 2838 + }, + { + "epoch": 0.2774736018771998, + "loss": 0.1533312052488327, + "loss_ce": 0.0065416572615504265, + "loss_iou": 0.365234375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 275233540, + "step": 2838 + }, + { + "epoch": 0.27757137270238563, + "grad_norm": 23.517162080312854, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 275329736, + "step": 2839 + }, + { + "epoch": 0.27757137270238563, + "loss": 0.05400582402944565, + "loss_ce": 0.004094327799975872, + "loss_iou": 0.369140625, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 275329736, + "step": 2839 + }, + { + "epoch": 0.2776691435275714, + "grad_norm": 11.266921242412904, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 275427060, + "step": 2840 + }, + { + "epoch": 0.2776691435275714, + "loss": 0.03542913496494293, + "loss_ce": 0.005155694670975208, + "loss_iou": 0.22265625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 275427060, + "step": 2840 + }, + { + "epoch": 0.27776691435275713, + "grad_norm": 17.1548071484689, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 275524244, + "step": 2841 + }, + { + "epoch": 0.27776691435275713, + "loss": 0.14134977757930756, + "loss_ce": 0.005546558648347855, + "loss_iou": 0.373046875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 275524244, + "step": 2841 + }, + { + "epoch": 0.2778646851779429, + "grad_norm": 13.782919909223699, + "learning_rate": 5e-05, + "loss": 0.1299, + "num_input_tokens_seen": 275621088, + "step": 2842 + }, + { + "epoch": 0.2778646851779429, + "loss": 0.12237799912691116, + "loss_ce": 0.003374699968844652, + "loss_iou": 0.345703125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 275621088, + "step": 2842 + }, + { + "epoch": 0.2779624560031287, + "grad_norm": 6.024956311371647, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 275717604, + "step": 2843 + }, + { + "epoch": 0.2779624560031287, + "loss": 0.1397942304611206, + "loss_ce": 0.006783359684050083, + "loss_iou": 0.2734375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 275717604, + "step": 2843 + }, + { + "epoch": 0.27806022682831444, + "grad_norm": 2.7150286396128975, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 275814116, + "step": 2844 + }, + { + "epoch": 0.27806022682831444, + "loss": 0.10112946480512619, + "loss_ce": 0.008340765722095966, + "loss_iou": 0.2158203125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 275814116, + "step": 2844 + }, + { + "epoch": 0.2781579976535002, + "grad_norm": 7.6578582428734325, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 275910960, + "step": 2845 + }, + { + "epoch": 0.2781579976535002, + "loss": 0.08085770905017853, + "loss_ce": 0.003659677691757679, + "loss_iou": 0.2177734375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 275910960, + "step": 2845 + }, + { + "epoch": 0.27825576847868594, + "grad_norm": 9.994330844471767, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 276007772, + "step": 2846 + }, + { + "epoch": 0.27825576847868594, + "loss": 0.05559761822223663, + "loss_ce": 0.005045249592512846, + "loss_iou": 0.341796875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 276007772, + "step": 2846 + }, + { + "epoch": 0.27835353930387174, + "grad_norm": 37.41300395951545, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 276105808, + "step": 2847 + }, + { + "epoch": 0.27835353930387174, + "loss": 0.10024410486221313, + "loss_ce": 0.005815093405544758, + "loss_iou": 0.291015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 276105808, + "step": 2847 + }, + { + "epoch": 0.2784513101290575, + "grad_norm": 19.720157186618295, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 276202448, + "step": 2848 + }, + { + "epoch": 0.2784513101290575, + "loss": 0.11036719381809235, + "loss_ce": 0.004791623912751675, + "loss_iou": 0.390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 276202448, + "step": 2848 + }, + { + "epoch": 0.27854908095424324, + "grad_norm": 8.633458145576489, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 276299376, + "step": 2849 + }, + { + "epoch": 0.27854908095424324, + "loss": 0.07246247678995132, + "loss_ce": 0.007399001158773899, + "loss_iou": 0.32421875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 276299376, + "step": 2849 + }, + { + "epoch": 0.278646851779429, + "grad_norm": 14.457180371018321, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 276395204, + "step": 2850 + }, + { + "epoch": 0.278646851779429, + "loss": 0.06777802109718323, + "loss_ce": 0.0029586879536509514, + "loss_iou": 0.25, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 276395204, + "step": 2850 + }, + { + "epoch": 0.2787446226046148, + "grad_norm": 10.003347581873777, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 276491520, + "step": 2851 + }, + { + "epoch": 0.2787446226046148, + "loss": 0.06788679957389832, + "loss_ce": 0.003845658153295517, + "loss_iou": 0.41015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 276491520, + "step": 2851 + }, + { + "epoch": 0.27884239342980055, + "grad_norm": 8.454126390769801, + "learning_rate": 5e-05, + "loss": 0.115, + "num_input_tokens_seen": 276587236, + "step": 2852 + }, + { + "epoch": 0.27884239342980055, + "loss": 0.08757282793521881, + "loss_ce": 0.00889851339161396, + "loss_iou": 0.185546875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 276587236, + "step": 2852 + }, + { + "epoch": 0.2789401642549863, + "grad_norm": 23.67769744416686, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 276684876, + "step": 2853 + }, + { + "epoch": 0.2789401642549863, + "loss": 0.07941722124814987, + "loss_ce": 0.003352157771587372, + "loss_iou": 0.330078125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 276684876, + "step": 2853 + }, + { + "epoch": 0.27903793508017205, + "grad_norm": 3.5864987509198696, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 276782400, + "step": 2854 + }, + { + "epoch": 0.27903793508017205, + "loss": 0.03773590177297592, + "loss_ce": 0.0017556753009557724, + "loss_iou": 0.390625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 276782400, + "step": 2854 + }, + { + "epoch": 0.27913570590535786, + "grad_norm": 6.539600369047447, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 276878960, + "step": 2855 + }, + { + "epoch": 0.27913570590535786, + "loss": 0.09990808367729187, + "loss_ce": 0.007447442039847374, + "loss_iou": 0.2431640625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 276878960, + "step": 2855 + }, + { + "epoch": 0.2792334767305436, + "grad_norm": 11.202786468513818, + "learning_rate": 5e-05, + "loss": 0.1301, + "num_input_tokens_seen": 276975124, + "step": 2856 + }, + { + "epoch": 0.2792334767305436, + "loss": 0.09473992884159088, + "loss_ce": 0.005369193851947784, + "loss_iou": 0.275390625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 276975124, + "step": 2856 + }, + { + "epoch": 0.27933124755572936, + "grad_norm": 5.153603544528933, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 277072440, + "step": 2857 + }, + { + "epoch": 0.27933124755572936, + "loss": 0.06889832764863968, + "loss_ce": 0.005360729526728392, + "loss_iou": 0.283203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 277072440, + "step": 2857 + }, + { + "epoch": 0.27942901838091516, + "grad_norm": 16.58600439708648, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 277169332, + "step": 2858 + }, + { + "epoch": 0.27942901838091516, + "loss": 0.07000026106834412, + "loss_ce": 0.004021252039819956, + "loss_iou": 0.41796875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 277169332, + "step": 2858 + }, + { + "epoch": 0.2795267892061009, + "grad_norm": 21.100391415647326, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 277266068, + "step": 2859 + }, + { + "epoch": 0.2795267892061009, + "loss": 0.10759030282497406, + "loss_ce": 0.00500546395778656, + "loss_iou": 0.375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 277266068, + "step": 2859 + }, + { + "epoch": 0.27962456003128666, + "grad_norm": 6.730432359261116, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 277363288, + "step": 2860 + }, + { + "epoch": 0.27962456003128666, + "loss": 0.0826491266489029, + "loss_ce": 0.0041121444664895535, + "loss_iou": 0.443359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 277363288, + "step": 2860 + }, + { + "epoch": 0.2797223308564724, + "grad_norm": 4.454715968243574, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 277460868, + "step": 2861 + }, + { + "epoch": 0.2797223308564724, + "loss": 0.13478946685791016, + "loss_ce": 0.0030145556665956974, + "loss_iou": 0.484375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 277460868, + "step": 2861 + }, + { + "epoch": 0.2798201016816582, + "grad_norm": 11.893399636634424, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 277558280, + "step": 2862 + }, + { + "epoch": 0.2798201016816582, + "loss": 0.07247485220432281, + "loss_ce": 0.0028184850234538317, + "loss_iou": 0.486328125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 277558280, + "step": 2862 + }, + { + "epoch": 0.27991787250684397, + "grad_norm": 18.594652249424204, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 277655200, + "step": 2863 + }, + { + "epoch": 0.27991787250684397, + "loss": 0.10457779467105865, + "loss_ce": 0.006280675996094942, + "loss_iou": 0.302734375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 277655200, + "step": 2863 + }, + { + "epoch": 0.2800156433320297, + "grad_norm": 17.28714491369428, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 277752516, + "step": 2864 + }, + { + "epoch": 0.2800156433320297, + "loss": 0.08062388002872467, + "loss_ce": 0.005367531441152096, + "loss_iou": 0.40625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 277752516, + "step": 2864 + }, + { + "epoch": 0.28011341415721547, + "grad_norm": 23.492582814546314, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 277849396, + "step": 2865 + }, + { + "epoch": 0.28011341415721547, + "loss": 0.08851126581430435, + "loss_ce": 0.008021153509616852, + "loss_iou": 0.296875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 277849396, + "step": 2865 + }, + { + "epoch": 0.2802111849824013, + "grad_norm": 14.487375919984547, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 277946296, + "step": 2866 + }, + { + "epoch": 0.2802111849824013, + "loss": 0.07364624738693237, + "loss_ce": 0.0071179307997226715, + "loss_iou": 0.302734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 277946296, + "step": 2866 + }, + { + "epoch": 0.280308955807587, + "grad_norm": 19.163730138540295, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 278042888, + "step": 2867 + }, + { + "epoch": 0.280308955807587, + "loss": 0.10856548696756363, + "loss_ce": 0.004897271282970905, + "loss_iou": 0.271484375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 278042888, + "step": 2867 + }, + { + "epoch": 0.2804067266327728, + "grad_norm": 12.329206509423189, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 278139740, + "step": 2868 + }, + { + "epoch": 0.2804067266327728, + "loss": 0.14694620668888092, + "loss_ce": 0.00787760503590107, + "loss_iou": 0.34765625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 278139740, + "step": 2868 + }, + { + "epoch": 0.2805044974579585, + "grad_norm": 11.711077522738544, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 278236936, + "step": 2869 + }, + { + "epoch": 0.2805044974579585, + "loss": 0.09680241346359253, + "loss_ce": 0.0032965531572699547, + "loss_iou": 0.31640625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 278236936, + "step": 2869 + }, + { + "epoch": 0.28060226828314433, + "grad_norm": 11.803499721547748, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 278333612, + "step": 2870 + }, + { + "epoch": 0.28060226828314433, + "loss": 0.054893653839826584, + "loss_ce": 0.002998512936756015, + "loss_iou": 0.3359375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 278333612, + "step": 2870 + }, + { + "epoch": 0.2807000391083301, + "grad_norm": 8.593311193925771, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 278430804, + "step": 2871 + }, + { + "epoch": 0.2807000391083301, + "loss": 0.09460017830133438, + "loss_ce": 0.003840906545519829, + "loss_iou": 0.40234375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 278430804, + "step": 2871 + }, + { + "epoch": 0.28079780993351583, + "grad_norm": 3.312694552774535, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 278528528, + "step": 2872 + }, + { + "epoch": 0.28079780993351583, + "loss": 0.07175210118293762, + "loss_ce": 0.00728371599689126, + "loss_iou": 0.326171875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 278528528, + "step": 2872 + }, + { + "epoch": 0.2808955807587016, + "grad_norm": 3.9777117913444004, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 278625256, + "step": 2873 + }, + { + "epoch": 0.2808955807587016, + "loss": 0.07919661700725555, + "loss_ce": 0.0023152099456638098, + "loss_iou": 0.30078125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 278625256, + "step": 2873 + }, + { + "epoch": 0.2809933515838874, + "grad_norm": 11.72996116267319, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 278721920, + "step": 2874 + }, + { + "epoch": 0.2809933515838874, + "loss": 0.12244880199432373, + "loss_ce": 0.0080231549218297, + "loss_iou": 0.388671875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 278721920, + "step": 2874 + }, + { + "epoch": 0.28109112240907314, + "grad_norm": 47.08642344047845, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 278819444, + "step": 2875 + }, + { + "epoch": 0.28109112240907314, + "loss": 0.09393934160470963, + "loss_ce": 0.004065076820552349, + "loss_iou": 0.353515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 278819444, + "step": 2875 + }, + { + "epoch": 0.2811888932342589, + "grad_norm": 7.870331314302565, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 278916544, + "step": 2876 + }, + { + "epoch": 0.2811888932342589, + "loss": 0.10811921954154968, + "loss_ce": 0.005915849469602108, + "loss_iou": 0.3359375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 278916544, + "step": 2876 + }, + { + "epoch": 0.28128666405944464, + "grad_norm": 20.090711861569382, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 279014664, + "step": 2877 + }, + { + "epoch": 0.28128666405944464, + "loss": 0.09908893704414368, + "loss_ce": 0.004209781531244516, + "loss_iou": 0.35546875, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 279014664, + "step": 2877 + }, + { + "epoch": 0.28138443488463044, + "grad_norm": 12.746048571075475, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 279111652, + "step": 2878 + }, + { + "epoch": 0.28138443488463044, + "loss": 0.0386238619685173, + "loss_ce": 0.0016289281193166971, + "loss_iou": 0.361328125, + "loss_num": 0.00738525390625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 279111652, + "step": 2878 + }, + { + "epoch": 0.2814822057098162, + "grad_norm": 10.90682066068508, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 279208228, + "step": 2879 + }, + { + "epoch": 0.2814822057098162, + "loss": 0.08390174806118011, + "loss_ce": 0.005486832000315189, + "loss_iou": 0.2734375, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 279208228, + "step": 2879 + }, + { + "epoch": 0.28157997653500194, + "grad_norm": 5.39308791115985, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 279305140, + "step": 2880 + }, + { + "epoch": 0.28157997653500194, + "loss": 0.10173936933279037, + "loss_ce": 0.012399157509207726, + "loss_iou": 0.3359375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 279305140, + "step": 2880 + }, + { + "epoch": 0.28167774736018775, + "grad_norm": 5.014705133523134, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 279401876, + "step": 2881 + }, + { + "epoch": 0.28167774736018775, + "loss": 0.10315966606140137, + "loss_ce": 0.008311033248901367, + "loss_iou": 0.2451171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 279401876, + "step": 2881 + }, + { + "epoch": 0.2817755181853735, + "grad_norm": 12.110108111209687, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 279498308, + "step": 2882 + }, + { + "epoch": 0.2817755181853735, + "loss": 0.08747729659080505, + "loss_ce": 0.005423163063824177, + "loss_iou": 0.40234375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 279498308, + "step": 2882 + }, + { + "epoch": 0.28187328901055925, + "grad_norm": 27.432875774561914, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 279596144, + "step": 2883 + }, + { + "epoch": 0.28187328901055925, + "loss": 0.0779886469244957, + "loss_ce": 0.0019464722136035562, + "loss_iou": 0.31640625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 279596144, + "step": 2883 + }, + { + "epoch": 0.281971059835745, + "grad_norm": 13.525507210732892, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 279693872, + "step": 2884 + }, + { + "epoch": 0.281971059835745, + "loss": 0.12456765025854111, + "loss_ce": 0.0016275802627205849, + "loss_iou": 0.4140625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 279693872, + "step": 2884 + }, + { + "epoch": 0.2820688306609308, + "grad_norm": 21.061191871157902, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 279791028, + "step": 2885 + }, + { + "epoch": 0.2820688306609308, + "loss": 0.11310992389917374, + "loss_ce": 0.005413387902081013, + "loss_iou": 0.431640625, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 279791028, + "step": 2885 + }, + { + "epoch": 0.28216660148611655, + "grad_norm": 22.96911299986217, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 279889320, + "step": 2886 + }, + { + "epoch": 0.28216660148611655, + "loss": 0.1002797782421112, + "loss_ce": 0.0037984580267220736, + "loss_iou": 0.39453125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 279889320, + "step": 2886 + }, + { + "epoch": 0.2822643723113023, + "grad_norm": 13.623018628506083, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 279986620, + "step": 2887 + }, + { + "epoch": 0.2822643723113023, + "loss": 0.06704196333885193, + "loss_ce": 0.006601900793612003, + "loss_iou": 0.2734375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 279986620, + "step": 2887 + }, + { + "epoch": 0.28236214313648805, + "grad_norm": 22.521300622723306, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 280082644, + "step": 2888 + }, + { + "epoch": 0.28236214313648805, + "loss": 0.11589915305376053, + "loss_ce": 0.006569926626980305, + "loss_iou": 0.353515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 280082644, + "step": 2888 + }, + { + "epoch": 0.28245991396167386, + "grad_norm": 30.34261751848671, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 280179168, + "step": 2889 + }, + { + "epoch": 0.28245991396167386, + "loss": 0.07792708277702332, + "loss_ce": 0.005447830073535442, + "loss_iou": 0.2412109375, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 280179168, + "step": 2889 + }, + { + "epoch": 0.2825576847868596, + "grad_norm": 20.805831270418707, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 280276020, + "step": 2890 + }, + { + "epoch": 0.2825576847868596, + "loss": 0.10547760128974915, + "loss_ce": 0.0024044890888035297, + "loss_iou": 0.380859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 280276020, + "step": 2890 + }, + { + "epoch": 0.28265545561204536, + "grad_norm": 8.924531607748312, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 280372152, + "step": 2891 + }, + { + "epoch": 0.28265545561204536, + "loss": 0.07014094293117523, + "loss_ce": 0.001979929395020008, + "loss_iou": 0.3359375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 280372152, + "step": 2891 + }, + { + "epoch": 0.2827532264372311, + "grad_norm": 14.733353410881122, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 280469132, + "step": 2892 + }, + { + "epoch": 0.2827532264372311, + "loss": 0.11886191368103027, + "loss_ce": 0.007106548175215721, + "loss_iou": 0.306640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 280469132, + "step": 2892 + }, + { + "epoch": 0.2828509972624169, + "grad_norm": 5.902861927943977, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 280566256, + "step": 2893 + }, + { + "epoch": 0.2828509972624169, + "loss": 0.07275575399398804, + "loss_ce": 0.003648701123893261, + "loss_iou": 0.248046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 280566256, + "step": 2893 + }, + { + "epoch": 0.28294876808760266, + "grad_norm": 8.070343196484318, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 280662668, + "step": 2894 + }, + { + "epoch": 0.28294876808760266, + "loss": 0.07545798271894455, + "loss_ce": 0.003043584991246462, + "loss_iou": 0.38671875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 280662668, + "step": 2894 + }, + { + "epoch": 0.2830465389127884, + "grad_norm": 11.610383642465559, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 280759672, + "step": 2895 + }, + { + "epoch": 0.2830465389127884, + "loss": 0.08033104240894318, + "loss_ce": 0.0027248440310359, + "loss_iou": 0.3359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 280759672, + "step": 2895 + }, + { + "epoch": 0.28314430973797416, + "grad_norm": 13.185757247667324, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 280855908, + "step": 2896 + }, + { + "epoch": 0.28314430973797416, + "loss": 0.1329152137041092, + "loss_ce": 0.007381154224276543, + "loss_iou": 0.294921875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 280855908, + "step": 2896 + }, + { + "epoch": 0.28324208056315997, + "grad_norm": 28.10680463713813, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 280952444, + "step": 2897 + }, + { + "epoch": 0.28324208056315997, + "loss": 0.05538608878850937, + "loss_ce": 0.00559666333720088, + "loss_iou": 0.2490234375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 280952444, + "step": 2897 + }, + { + "epoch": 0.2833398513883457, + "grad_norm": 18.628240279244196, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 281049164, + "step": 2898 + }, + { + "epoch": 0.2833398513883457, + "loss": 0.09193428605794907, + "loss_ce": 0.004394617397338152, + "loss_iou": 0.35546875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 281049164, + "step": 2898 + }, + { + "epoch": 0.28343762221353147, + "grad_norm": 11.799250071811757, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 281146544, + "step": 2899 + }, + { + "epoch": 0.28343762221353147, + "loss": 0.10539726912975311, + "loss_ce": 0.0019426890648901463, + "loss_iou": 0.341796875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 281146544, + "step": 2899 + }, + { + "epoch": 0.2835353930387172, + "grad_norm": 13.986880604935664, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 281243964, + "step": 2900 + }, + { + "epoch": 0.2835353930387172, + "loss": 0.13385874032974243, + "loss_ce": 0.006936132907867432, + "loss_iou": 0.314453125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 281243964, + "step": 2900 + }, + { + "epoch": 0.283633163863903, + "grad_norm": 21.787185041681106, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 281340864, + "step": 2901 + }, + { + "epoch": 0.283633163863903, + "loss": 0.07443123310804367, + "loss_ce": 0.0061100018210709095, + "loss_iou": 0.33984375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 281340864, + "step": 2901 + }, + { + "epoch": 0.2837309346890888, + "grad_norm": 15.80609620897729, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 281436780, + "step": 2902 + }, + { + "epoch": 0.2837309346890888, + "loss": 0.10293562710285187, + "loss_ce": 0.008178547956049442, + "loss_iou": 0.287109375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 281436780, + "step": 2902 + }, + { + "epoch": 0.2838287055142745, + "grad_norm": 9.77854346713113, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 281533056, + "step": 2903 + }, + { + "epoch": 0.2838287055142745, + "loss": 0.0996597558259964, + "loss_ce": 0.0085037462413311, + "loss_iou": 0.337890625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 281533056, + "step": 2903 + }, + { + "epoch": 0.28392647633946033, + "grad_norm": 16.541047662601926, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 281629840, + "step": 2904 + }, + { + "epoch": 0.28392647633946033, + "loss": 0.10073017328977585, + "loss_ce": 0.008383981883525848, + "loss_iou": 0.263671875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 281629840, + "step": 2904 + }, + { + "epoch": 0.2840242471646461, + "grad_norm": 19.055476761712132, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 281726452, + "step": 2905 + }, + { + "epoch": 0.2840242471646461, + "loss": 0.08339732140302658, + "loss_ce": 0.004829819779843092, + "loss_iou": 0.216796875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 281726452, + "step": 2905 + }, + { + "epoch": 0.28412201798983183, + "grad_norm": 17.611062994227805, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 281823504, + "step": 2906 + }, + { + "epoch": 0.28412201798983183, + "loss": 0.10853596776723862, + "loss_ce": 0.003311351640149951, + "loss_iou": 0.330078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 281823504, + "step": 2906 + }, + { + "epoch": 0.2842197888150176, + "grad_norm": 8.63458391146665, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 281919932, + "step": 2907 + }, + { + "epoch": 0.2842197888150176, + "loss": 0.10572382807731628, + "loss_ce": 0.0033068391494452953, + "loss_iou": 0.29296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 281919932, + "step": 2907 + }, + { + "epoch": 0.2843175596402034, + "grad_norm": 9.570784006336137, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 282016500, + "step": 2908 + }, + { + "epoch": 0.2843175596402034, + "loss": 0.05848946422338486, + "loss_ce": 0.001345298602245748, + "loss_iou": 0.41796875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 282016500, + "step": 2908 + }, + { + "epoch": 0.28441533046538914, + "grad_norm": 13.06321672985584, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 282113176, + "step": 2909 + }, + { + "epoch": 0.28441533046538914, + "loss": 0.14513634145259857, + "loss_ce": 0.010797964408993721, + "loss_iou": 0.296875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 282113176, + "step": 2909 + }, + { + "epoch": 0.2845131012905749, + "grad_norm": 16.49381205368563, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 282209536, + "step": 2910 + }, + { + "epoch": 0.2845131012905749, + "loss": 0.05949592590332031, + "loss_ce": 0.0052814488299191, + "loss_iou": 0.275390625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 282209536, + "step": 2910 + }, + { + "epoch": 0.28461087211576064, + "grad_norm": 8.678940957955875, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 282305684, + "step": 2911 + }, + { + "epoch": 0.28461087211576064, + "loss": 0.052536554634571075, + "loss_ce": 0.0047155097126960754, + "loss_iou": 0.173828125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 282305684, + "step": 2911 + }, + { + "epoch": 0.28470864294094644, + "grad_norm": 10.305135317766435, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 282403004, + "step": 2912 + }, + { + "epoch": 0.28470864294094644, + "loss": 0.0794728547334671, + "loss_ce": 0.005246476270258427, + "loss_iou": 0.375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 282403004, + "step": 2912 + }, + { + "epoch": 0.2848064137661322, + "grad_norm": 11.994484480418253, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 282500228, + "step": 2913 + }, + { + "epoch": 0.2848064137661322, + "loss": 0.058388784527778625, + "loss_ce": 0.003480032552033663, + "loss_iou": 0.296875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 282500228, + "step": 2913 + }, + { + "epoch": 0.28490418459131794, + "grad_norm": 3.9729883044642125, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 282597232, + "step": 2914 + }, + { + "epoch": 0.28490418459131794, + "loss": 0.08809502422809601, + "loss_ce": 0.009725884534418583, + "loss_iou": 0.287109375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 282597232, + "step": 2914 + }, + { + "epoch": 0.2850019554165037, + "grad_norm": 4.55271534843257, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 282693892, + "step": 2915 + }, + { + "epoch": 0.2850019554165037, + "loss": 0.06396353244781494, + "loss_ce": 0.004271145910024643, + "loss_iou": 0.169921875, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 282693892, + "step": 2915 + }, + { + "epoch": 0.2850997262416895, + "grad_norm": 30.390966804800584, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 282789884, + "step": 2916 + }, + { + "epoch": 0.2850997262416895, + "loss": 0.12124893069267273, + "loss_ce": 0.005068515427410603, + "loss_iou": 0.3125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 282789884, + "step": 2916 + }, + { + "epoch": 0.28519749706687525, + "grad_norm": 29.94970139777286, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 282887208, + "step": 2917 + }, + { + "epoch": 0.28519749706687525, + "loss": 0.05930289998650551, + "loss_ce": 0.00298270839266479, + "loss_iou": 0.2021484375, + "loss_num": 0.01123046875, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 282887208, + "step": 2917 + }, + { + "epoch": 0.285295267892061, + "grad_norm": 5.832603992802775, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 282984264, + "step": 2918 + }, + { + "epoch": 0.285295267892061, + "loss": 0.08669030666351318, + "loss_ce": 0.008244874887168407, + "loss_iou": 0.3984375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 282984264, + "step": 2918 + }, + { + "epoch": 0.28539303871724675, + "grad_norm": 7.822829523642919, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 283081692, + "step": 2919 + }, + { + "epoch": 0.28539303871724675, + "loss": 0.07720481604337692, + "loss_ce": 0.005183332599699497, + "loss_iou": 0.322265625, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 283081692, + "step": 2919 + }, + { + "epoch": 0.28549080954243256, + "grad_norm": 3.6272030504319215, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 283177964, + "step": 2920 + }, + { + "epoch": 0.28549080954243256, + "loss": 0.06070586293935776, + "loss_ce": 0.0035845839884132147, + "loss_iou": 0.275390625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 283177964, + "step": 2920 + }, + { + "epoch": 0.2855885803676183, + "grad_norm": 11.33801845671821, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 283275352, + "step": 2921 + }, + { + "epoch": 0.2855885803676183, + "loss": 0.07446151971817017, + "loss_ce": 0.003233498428016901, + "loss_iou": 0.369140625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 283275352, + "step": 2921 + }, + { + "epoch": 0.28568635119280406, + "grad_norm": 3.5970454747131257, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 283372884, + "step": 2922 + }, + { + "epoch": 0.28568635119280406, + "loss": 0.06136836111545563, + "loss_ce": 0.006162060424685478, + "loss_iou": 0.265625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 283372884, + "step": 2922 + }, + { + "epoch": 0.2857841220179898, + "grad_norm": 4.441849739488735, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 283469908, + "step": 2923 + }, + { + "epoch": 0.2857841220179898, + "loss": 0.08465003222227097, + "loss_ce": 0.010065071284770966, + "loss_iou": 0.30859375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 283469908, + "step": 2923 + }, + { + "epoch": 0.2858818928431756, + "grad_norm": 9.713275243267796, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 283567240, + "step": 2924 + }, + { + "epoch": 0.2858818928431756, + "loss": 0.0647803544998169, + "loss_ce": 0.001578449155203998, + "loss_iou": 0.287109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 283567240, + "step": 2924 + }, + { + "epoch": 0.28597966366836136, + "grad_norm": 18.92847039550703, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 283663880, + "step": 2925 + }, + { + "epoch": 0.28597966366836136, + "loss": 0.09501159936189651, + "loss_ce": 0.0073136198334395885, + "loss_iou": 0.2734375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 283663880, + "step": 2925 + }, + { + "epoch": 0.2860774344935471, + "grad_norm": 20.928751365647518, + "learning_rate": 5e-05, + "loss": 0.1407, + "num_input_tokens_seen": 283760772, + "step": 2926 + }, + { + "epoch": 0.2860774344935471, + "loss": 0.1900111436843872, + "loss_ce": 0.0039149560034275055, + "loss_iou": 0.328125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 283760772, + "step": 2926 + }, + { + "epoch": 0.2861752053187329, + "grad_norm": 13.59084122920831, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 283857048, + "step": 2927 + }, + { + "epoch": 0.2861752053187329, + "loss": 0.0815371721982956, + "loss_ce": 0.003885194193571806, + "loss_iou": 0.2333984375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 283857048, + "step": 2927 + }, + { + "epoch": 0.28627297614391867, + "grad_norm": 18.631587204818032, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 283953248, + "step": 2928 + }, + { + "epoch": 0.28627297614391867, + "loss": 0.04170221462845802, + "loss_ce": 0.004058784805238247, + "loss_iou": 0.412109375, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 283953248, + "step": 2928 + }, + { + "epoch": 0.2863707469691044, + "grad_norm": 25.725284087177965, + "learning_rate": 5e-05, + "loss": 0.1224, + "num_input_tokens_seen": 284050832, + "step": 2929 + }, + { + "epoch": 0.2863707469691044, + "loss": 0.0895235538482666, + "loss_ce": 0.0029299254529178143, + "loss_iou": 0.3671875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 284050832, + "step": 2929 + }, + { + "epoch": 0.28646851779429017, + "grad_norm": 22.49412380769573, + "learning_rate": 5e-05, + "loss": 0.1454, + "num_input_tokens_seen": 284148220, + "step": 2930 + }, + { + "epoch": 0.28646851779429017, + "loss": 0.15053889155387878, + "loss_ce": 0.004939514212310314, + "loss_iou": 0.32421875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 284148220, + "step": 2930 + }, + { + "epoch": 0.286566288619476, + "grad_norm": 9.134784187072096, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 284245580, + "step": 2931 + }, + { + "epoch": 0.286566288619476, + "loss": 0.0705748051404953, + "loss_ce": 0.004839939996600151, + "loss_iou": 0.37109375, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 284245580, + "step": 2931 + }, + { + "epoch": 0.2866640594446617, + "grad_norm": 10.186090042912, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 284343808, + "step": 2932 + }, + { + "epoch": 0.2866640594446617, + "loss": 0.08679386973381042, + "loss_ce": 0.005113567225635052, + "loss_iou": 0.34765625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 284343808, + "step": 2932 + }, + { + "epoch": 0.2867618302698475, + "grad_norm": 28.736519283924906, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 284441328, + "step": 2933 + }, + { + "epoch": 0.2867618302698475, + "loss": 0.08361822366714478, + "loss_ce": 0.00442510424181819, + "loss_iou": 0.388671875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 284441328, + "step": 2933 + }, + { + "epoch": 0.2868596010950332, + "grad_norm": 19.5587462419593, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 284537700, + "step": 2934 + }, + { + "epoch": 0.2868596010950332, + "loss": 0.14148104190826416, + "loss_ce": 0.008561724796891212, + "loss_iou": 0.27734375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 284537700, + "step": 2934 + }, + { + "epoch": 0.28695737192021903, + "grad_norm": 12.460876710314183, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 284633568, + "step": 2935 + }, + { + "epoch": 0.28695737192021903, + "loss": 0.0881640613079071, + "loss_ce": 0.004942623432725668, + "loss_iou": 0.1435546875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 284633568, + "step": 2935 + }, + { + "epoch": 0.2870551427454048, + "grad_norm": 12.588902207968573, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 284730296, + "step": 2936 + }, + { + "epoch": 0.2870551427454048, + "loss": 0.08078721165657043, + "loss_ce": 0.004096540622413158, + "loss_iou": 0.3203125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 284730296, + "step": 2936 + }, + { + "epoch": 0.28715291357059053, + "grad_norm": 10.426151849020771, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 284826796, + "step": 2937 + }, + { + "epoch": 0.28715291357059053, + "loss": 0.09663325548171997, + "loss_ce": 0.008864698931574821, + "loss_iou": 0.265625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 284826796, + "step": 2937 + }, + { + "epoch": 0.2872506843957763, + "grad_norm": 16.49248159606416, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 284924072, + "step": 2938 + }, + { + "epoch": 0.2872506843957763, + "loss": 0.10371525585651398, + "loss_ce": 0.007371261715888977, + "loss_iou": 0.3359375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 284924072, + "step": 2938 + }, + { + "epoch": 0.2873484552209621, + "grad_norm": 20.05518190330537, + "learning_rate": 5e-05, + "loss": 0.1309, + "num_input_tokens_seen": 285021228, + "step": 2939 + }, + { + "epoch": 0.2873484552209621, + "loss": 0.13435077667236328, + "loss_ce": 0.008450495079159737, + "loss_iou": 0.2265625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 285021228, + "step": 2939 + }, + { + "epoch": 0.28744622604614783, + "grad_norm": 4.236824663785765, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 285117548, + "step": 2940 + }, + { + "epoch": 0.28744622604614783, + "loss": 0.09021838754415512, + "loss_ce": 0.004708138294517994, + "loss_iou": 0.296875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 285117548, + "step": 2940 + }, + { + "epoch": 0.2875439968713336, + "grad_norm": 6.466774781968798, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 285213680, + "step": 2941 + }, + { + "epoch": 0.2875439968713336, + "loss": 0.095126211643219, + "loss_ce": 0.00986009743064642, + "loss_iou": 0.30859375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 285213680, + "step": 2941 + }, + { + "epoch": 0.28764176769651933, + "grad_norm": 8.477587720992341, + "learning_rate": 5e-05, + "loss": 0.1274, + "num_input_tokens_seen": 285310324, + "step": 2942 + }, + { + "epoch": 0.28764176769651933, + "loss": 0.16087394952774048, + "loss_ce": 0.003746566129848361, + "loss_iou": 0.2412109375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 285310324, + "step": 2942 + }, + { + "epoch": 0.28773953852170514, + "grad_norm": 14.50463185330346, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 285407304, + "step": 2943 + }, + { + "epoch": 0.28773953852170514, + "loss": 0.146215558052063, + "loss_ce": 0.004034171346575022, + "loss_iou": 0.2470703125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 285407304, + "step": 2943 + }, + { + "epoch": 0.2878373093468909, + "grad_norm": 12.034439112091745, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 285504160, + "step": 2944 + }, + { + "epoch": 0.2878373093468909, + "loss": 0.1476200520992279, + "loss_ce": 0.007918210700154305, + "loss_iou": 0.283203125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 285504160, + "step": 2944 + }, + { + "epoch": 0.28793508017207664, + "grad_norm": 13.388451475855145, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 285601276, + "step": 2945 + }, + { + "epoch": 0.28793508017207664, + "loss": 0.12088406085968018, + "loss_ce": 0.007724884431809187, + "loss_iou": 0.23828125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 285601276, + "step": 2945 + }, + { + "epoch": 0.2880328509972624, + "grad_norm": 10.023747876099483, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 285698384, + "step": 2946 + }, + { + "epoch": 0.2880328509972624, + "loss": 0.0459895133972168, + "loss_ce": 0.005279063247144222, + "loss_iou": 0.328125, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 285698384, + "step": 2946 + }, + { + "epoch": 0.2881306218224482, + "grad_norm": 4.130291686659757, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 285795044, + "step": 2947 + }, + { + "epoch": 0.2881306218224482, + "loss": 0.10974570363759995, + "loss_ce": 0.007679657079279423, + "loss_iou": 0.25, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 285795044, + "step": 2947 + }, + { + "epoch": 0.28822839264763395, + "grad_norm": 7.197227005077431, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 285891636, + "step": 2948 + }, + { + "epoch": 0.28822839264763395, + "loss": 0.07987599074840546, + "loss_ce": 0.005191848147660494, + "loss_iou": 0.28515625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 285891636, + "step": 2948 + }, + { + "epoch": 0.2883261634728197, + "grad_norm": 10.610699903607102, + "learning_rate": 5e-05, + "loss": 0.0321, + "num_input_tokens_seen": 285988004, + "step": 2949 + }, + { + "epoch": 0.2883261634728197, + "loss": 0.03132497891783714, + "loss_ce": 0.0028749671764671803, + "loss_iou": 0.25390625, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 285988004, + "step": 2949 + }, + { + "epoch": 0.2884239342980055, + "grad_norm": 13.520842674373052, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 286084748, + "step": 2950 + }, + { + "epoch": 0.2884239342980055, + "loss": 0.08337000012397766, + "loss_ce": 0.0031087705865502357, + "loss_iou": 0.263671875, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 286084748, + "step": 2950 + }, + { + "epoch": 0.28852170512319125, + "grad_norm": 15.89108240376469, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 286182208, + "step": 2951 + }, + { + "epoch": 0.28852170512319125, + "loss": 0.0602261908352375, + "loss_ce": 0.002906547859311104, + "loss_iou": 0.25390625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 286182208, + "step": 2951 + }, + { + "epoch": 0.288619475948377, + "grad_norm": 18.099100021451516, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 286279016, + "step": 2952 + }, + { + "epoch": 0.288619475948377, + "loss": 0.07330058515071869, + "loss_ce": 0.0031330466736108065, + "loss_iou": 0.306640625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 286279016, + "step": 2952 + }, + { + "epoch": 0.28871724677356275, + "grad_norm": 22.21903679864396, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 286375440, + "step": 2953 + }, + { + "epoch": 0.28871724677356275, + "loss": 0.06302519142627716, + "loss_ce": 0.0012957565486431122, + "loss_iou": 0.30078125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 286375440, + "step": 2953 + }, + { + "epoch": 0.28881501759874856, + "grad_norm": 16.941503297342887, + "learning_rate": 5e-05, + "loss": 0.1274, + "num_input_tokens_seen": 286471788, + "step": 2954 + }, + { + "epoch": 0.28881501759874856, + "loss": 0.11202295124530792, + "loss_ce": 0.003563476260751486, + "loss_iou": 0.306640625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 286471788, + "step": 2954 + }, + { + "epoch": 0.2889127884239343, + "grad_norm": 17.147621397540263, + "learning_rate": 5e-05, + "loss": 0.1322, + "num_input_tokens_seen": 286568104, + "step": 2955 + }, + { + "epoch": 0.2889127884239343, + "loss": 0.1452564299106598, + "loss_ce": 0.006126781925559044, + "loss_iou": 0.251953125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 286568104, + "step": 2955 + }, + { + "epoch": 0.28901055924912006, + "grad_norm": 22.346874766651617, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 286665340, + "step": 2956 + }, + { + "epoch": 0.28901055924912006, + "loss": 0.09683408588171005, + "loss_ce": 0.004732031840831041, + "loss_iou": 0.357421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 286665340, + "step": 2956 + }, + { + "epoch": 0.2891083300743058, + "grad_norm": 10.90890633885773, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 286763036, + "step": 2957 + }, + { + "epoch": 0.2891083300743058, + "loss": 0.08429118990898132, + "loss_ce": 0.0050065298564732075, + "loss_iou": 0.357421875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 286763036, + "step": 2957 + }, + { + "epoch": 0.2892061008994916, + "grad_norm": 18.608295305879867, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 286859904, + "step": 2958 + }, + { + "epoch": 0.2892061008994916, + "loss": 0.11954689025878906, + "loss_ce": 0.0037631955929100513, + "loss_iou": 0.345703125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 286859904, + "step": 2958 + }, + { + "epoch": 0.28930387172467736, + "grad_norm": 25.558174138941265, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 286956216, + "step": 2959 + }, + { + "epoch": 0.28930387172467736, + "loss": 0.07993466407060623, + "loss_ce": 0.0032745078206062317, + "loss_iou": 0.2890625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 286956216, + "step": 2959 + }, + { + "epoch": 0.2894016425498631, + "grad_norm": 7.092935923939709, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 287053056, + "step": 2960 + }, + { + "epoch": 0.2894016425498631, + "loss": 0.09155640006065369, + "loss_ce": 0.008365479297935963, + "loss_iou": 0.328125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 287053056, + "step": 2960 + }, + { + "epoch": 0.28949941337504886, + "grad_norm": 4.527567761915511, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 287150144, + "step": 2961 + }, + { + "epoch": 0.28949941337504886, + "loss": 0.10354883968830109, + "loss_ce": 0.007731274701654911, + "loss_iou": 0.2333984375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 287150144, + "step": 2961 + }, + { + "epoch": 0.28959718420023467, + "grad_norm": 8.0109142988706, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 287246952, + "step": 2962 + }, + { + "epoch": 0.28959718420023467, + "loss": 0.0943591445684433, + "loss_ce": 0.00591920455917716, + "loss_iou": 0.333984375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 287246952, + "step": 2962 + }, + { + "epoch": 0.2896949550254204, + "grad_norm": 4.8045399510164, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 287343812, + "step": 2963 + }, + { + "epoch": 0.2896949550254204, + "loss": 0.07044193148612976, + "loss_ce": 0.004554477520287037, + "loss_iou": 0.345703125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 287343812, + "step": 2963 + }, + { + "epoch": 0.28979272585060617, + "grad_norm": 21.570981248520287, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 287440800, + "step": 2964 + }, + { + "epoch": 0.28979272585060617, + "loss": 0.08796663582324982, + "loss_ce": 0.00891084410250187, + "loss_iou": 0.376953125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 287440800, + "step": 2964 + }, + { + "epoch": 0.2898904966757919, + "grad_norm": 35.21627176877912, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 287537888, + "step": 2965 + }, + { + "epoch": 0.2898904966757919, + "loss": 0.08457474410533905, + "loss_ce": 0.004099890124052763, + "loss_iou": 0.390625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 287537888, + "step": 2965 + }, + { + "epoch": 0.2899882675009777, + "grad_norm": 13.97832319558447, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 287634208, + "step": 2966 + }, + { + "epoch": 0.2899882675009777, + "loss": 0.09822142124176025, + "loss_ce": 0.004578231833875179, + "loss_iou": 0.25, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 287634208, + "step": 2966 + }, + { + "epoch": 0.2900860383261635, + "grad_norm": 2.6276619022683643, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 287731876, + "step": 2967 + }, + { + "epoch": 0.2900860383261635, + "loss": 0.13934481143951416, + "loss_ce": 0.008439654484391212, + "loss_iou": 0.376953125, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 287731876, + "step": 2967 + }, + { + "epoch": 0.2901838091513492, + "grad_norm": 15.349401527175981, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 287828400, + "step": 2968 + }, + { + "epoch": 0.2901838091513492, + "loss": 0.08111179620027542, + "loss_ce": 0.0025900648906826973, + "loss_iou": 0.2412109375, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 287828400, + "step": 2968 + }, + { + "epoch": 0.290281579976535, + "grad_norm": 6.25927233994957, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 287926076, + "step": 2969 + }, + { + "epoch": 0.290281579976535, + "loss": 0.12453983724117279, + "loss_ce": 0.00469730980694294, + "loss_iou": 0.39453125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 287926076, + "step": 2969 + }, + { + "epoch": 0.2903793508017208, + "grad_norm": 9.806979968059302, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 288022908, + "step": 2970 + }, + { + "epoch": 0.2903793508017208, + "loss": 0.12644875049591064, + "loss_ce": 0.008879782631993294, + "loss_iou": 0.390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 288022908, + "step": 2970 + }, + { + "epoch": 0.29047712162690653, + "grad_norm": 8.273271232445607, + "learning_rate": 5e-05, + "loss": 0.1579, + "num_input_tokens_seen": 288119748, + "step": 2971 + }, + { + "epoch": 0.29047712162690653, + "loss": 0.14552360773086548, + "loss_ce": 0.007187420502305031, + "loss_iou": 0.373046875, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 288119748, + "step": 2971 + }, + { + "epoch": 0.2905748924520923, + "grad_norm": 9.549225998661111, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 288216004, + "step": 2972 + }, + { + "epoch": 0.2905748924520923, + "loss": 0.04960041120648384, + "loss_ce": 0.00487690232694149, + "loss_iou": 0.2392578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 288216004, + "step": 2972 + }, + { + "epoch": 0.2906726632772781, + "grad_norm": 5.094841914346307, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 288312060, + "step": 2973 + }, + { + "epoch": 0.2906726632772781, + "loss": 0.08280328661203384, + "loss_ce": 0.007669004611670971, + "loss_iou": 0.3125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 288312060, + "step": 2973 + }, + { + "epoch": 0.29077043410246384, + "grad_norm": 6.40415683421311, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 288409516, + "step": 2974 + }, + { + "epoch": 0.29077043410246384, + "loss": 0.0880962461233139, + "loss_ce": 0.0036312141455709934, + "loss_iou": 0.33984375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 288409516, + "step": 2974 + }, + { + "epoch": 0.2908682049276496, + "grad_norm": 8.088719472858314, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 288506220, + "step": 2975 + }, + { + "epoch": 0.2908682049276496, + "loss": 0.06629671901464462, + "loss_ce": 0.0024997196160256863, + "loss_iou": 0.318359375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 288506220, + "step": 2975 + }, + { + "epoch": 0.29096597575283534, + "grad_norm": 2.5856791183176004, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 288603704, + "step": 2976 + }, + { + "epoch": 0.29096597575283534, + "loss": 0.08787461370229721, + "loss_ce": 0.010192116722464561, + "loss_iou": 0.248046875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 288603704, + "step": 2976 + }, + { + "epoch": 0.29106374657802114, + "grad_norm": 5.517134575202912, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 288701016, + "step": 2977 + }, + { + "epoch": 0.29106374657802114, + "loss": 0.07190260291099548, + "loss_ce": 0.006003709509968758, + "loss_iou": 0.208984375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 288701016, + "step": 2977 + }, + { + "epoch": 0.2911615174032069, + "grad_norm": 14.142946653875534, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 288797872, + "step": 2978 + }, + { + "epoch": 0.2911615174032069, + "loss": 0.09002050012350082, + "loss_ce": 0.003777822246775031, + "loss_iou": 0.353515625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 288797872, + "step": 2978 + }, + { + "epoch": 0.29125928822839264, + "grad_norm": 10.325960772122636, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 288894716, + "step": 2979 + }, + { + "epoch": 0.29125928822839264, + "loss": 0.06456051766872406, + "loss_ce": 0.007660491857677698, + "loss_iou": 0.25, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 288894716, + "step": 2979 + }, + { + "epoch": 0.2913570590535784, + "grad_norm": 8.14376881309434, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 288991628, + "step": 2980 + }, + { + "epoch": 0.2913570590535784, + "loss": 0.09955944120883942, + "loss_ce": 0.01023831032216549, + "loss_iou": 0.3671875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 288991628, + "step": 2980 + }, + { + "epoch": 0.2914548298787642, + "grad_norm": 11.163153200450036, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 289089524, + "step": 2981 + }, + { + "epoch": 0.2914548298787642, + "loss": 0.0883253663778305, + "loss_ce": 0.00832735002040863, + "loss_iou": 0.283203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 289089524, + "step": 2981 + }, + { + "epoch": 0.29155260070394995, + "grad_norm": 10.472669380254334, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 289187136, + "step": 2982 + }, + { + "epoch": 0.29155260070394995, + "loss": 0.09818844497203827, + "loss_ce": 0.0048428066074848175, + "loss_iou": 0.365234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 289187136, + "step": 2982 + }, + { + "epoch": 0.2916503715291357, + "grad_norm": 19.36646610172788, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 289284692, + "step": 2983 + }, + { + "epoch": 0.2916503715291357, + "loss": 0.10523207485675812, + "loss_ce": 0.006294081918895245, + "loss_iou": 0.40625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 289284692, + "step": 2983 + }, + { + "epoch": 0.29174814235432145, + "grad_norm": 22.872887770753156, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 289381348, + "step": 2984 + }, + { + "epoch": 0.29174814235432145, + "loss": 0.0625704824924469, + "loss_ce": 0.0035037067718803883, + "loss_iou": 0.3671875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 289381348, + "step": 2984 + }, + { + "epoch": 0.29184591317950725, + "grad_norm": 5.346269440932668, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 289478412, + "step": 2985 + }, + { + "epoch": 0.29184591317950725, + "loss": 0.07744103670120239, + "loss_ce": 0.00618249224498868, + "loss_iou": 0.263671875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 289478412, + "step": 2985 + }, + { + "epoch": 0.291943684004693, + "grad_norm": 13.277644226629889, + "learning_rate": 5e-05, + "loss": 0.1383, + "num_input_tokens_seen": 289574716, + "step": 2986 + }, + { + "epoch": 0.291943684004693, + "loss": 0.11937949806451797, + "loss_ce": 0.005114051979035139, + "loss_iou": 0.3046875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 289574716, + "step": 2986 + }, + { + "epoch": 0.29204145482987875, + "grad_norm": 10.17316545647391, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 289671168, + "step": 2987 + }, + { + "epoch": 0.29204145482987875, + "loss": 0.13040462136268616, + "loss_ce": 0.010409492999315262, + "loss_iou": 0.380859375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 289671168, + "step": 2987 + }, + { + "epoch": 0.2921392256550645, + "grad_norm": 12.809455418884452, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 289769020, + "step": 2988 + }, + { + "epoch": 0.2921392256550645, + "loss": 0.11403307318687439, + "loss_ce": 0.0034984122030436993, + "loss_iou": 0.31640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 289769020, + "step": 2988 + }, + { + "epoch": 0.2922369964802503, + "grad_norm": 21.99831000414943, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 289866176, + "step": 2989 + }, + { + "epoch": 0.2922369964802503, + "loss": 0.08037339895963669, + "loss_ce": 0.003804799634963274, + "loss_iou": 0.466796875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 289866176, + "step": 2989 + }, + { + "epoch": 0.29233476730543606, + "grad_norm": 19.3970788884752, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 289962636, + "step": 2990 + }, + { + "epoch": 0.29233476730543606, + "loss": 0.11759808659553528, + "loss_ce": 0.004148990381509066, + "loss_iou": 0.30859375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 289962636, + "step": 2990 + }, + { + "epoch": 0.2924325381306218, + "grad_norm": 8.630934665480558, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 290059832, + "step": 2991 + }, + { + "epoch": 0.2924325381306218, + "loss": 0.04364430904388428, + "loss_ce": 0.0037578337360173464, + "loss_iou": 0.388671875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 290059832, + "step": 2991 + }, + { + "epoch": 0.29253030895580756, + "grad_norm": 13.491985645024403, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 290157472, + "step": 2992 + }, + { + "epoch": 0.29253030895580756, + "loss": 0.07455100119113922, + "loss_ce": 0.006710425019264221, + "loss_iou": 0.45703125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 290157472, + "step": 2992 + }, + { + "epoch": 0.29262807978099337, + "grad_norm": 11.83990474058107, + "learning_rate": 5e-05, + "loss": 0.0945, + "num_input_tokens_seen": 290254652, + "step": 2993 + }, + { + "epoch": 0.29262807978099337, + "loss": 0.09451642632484436, + "loss_ce": 0.00715986080467701, + "loss_iou": 0.35546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 290254652, + "step": 2993 + }, + { + "epoch": 0.2927258506061791, + "grad_norm": 8.221338050349594, + "learning_rate": 5e-05, + "loss": 0.1291, + "num_input_tokens_seen": 290351504, + "step": 2994 + }, + { + "epoch": 0.2927258506061791, + "loss": 0.10353372991085052, + "loss_ce": 0.006457314360886812, + "loss_iou": 0.3359375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 290351504, + "step": 2994 + }, + { + "epoch": 0.29282362143136487, + "grad_norm": 39.31871595293714, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 290449268, + "step": 2995 + }, + { + "epoch": 0.29282362143136487, + "loss": 0.09018002450466156, + "loss_ce": 0.006989112123847008, + "loss_iou": 0.33984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 290449268, + "step": 2995 + }, + { + "epoch": 0.2929213922565507, + "grad_norm": 4.182032271962174, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 290546224, + "step": 2996 + }, + { + "epoch": 0.2929213922565507, + "loss": 0.08401721715927124, + "loss_ce": 0.004869883880019188, + "loss_iou": 0.291015625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 290546224, + "step": 2996 + }, + { + "epoch": 0.2930191630817364, + "grad_norm": 5.807786286330589, + "learning_rate": 5e-05, + "loss": 0.1375, + "num_input_tokens_seen": 290642272, + "step": 2997 + }, + { + "epoch": 0.2930191630817364, + "loss": 0.12671932578086853, + "loss_ce": 0.008719304576516151, + "loss_iou": 0.2177734375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 290642272, + "step": 2997 + }, + { + "epoch": 0.2931169339069222, + "grad_norm": 3.3680259981057907, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 290738940, + "step": 2998 + }, + { + "epoch": 0.2931169339069222, + "loss": 0.09025080502033234, + "loss_ce": 0.002238105982542038, + "loss_iou": 0.328125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 290738940, + "step": 2998 + }, + { + "epoch": 0.2932147047321079, + "grad_norm": 14.676787047721891, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 290835556, + "step": 2999 + }, + { + "epoch": 0.2932147047321079, + "loss": 0.09694012999534607, + "loss_ce": 0.01115521788597107, + "loss_iou": 0.349609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 290835556, + "step": 2999 + }, + { + "epoch": 0.29331247555729373, + "grad_norm": 16.038532586149483, + "learning_rate": 5e-05, + "loss": 0.1193, + "num_input_tokens_seen": 290932672, + "step": 3000 + }, + { + "epoch": 0.29331247555729373, + "eval_seeclick_CIoU": 0.4344654083251953, + "eval_seeclick_GIoU": 0.4305609464645386, + "eval_seeclick_IoU": 0.4880209118127823, + "eval_seeclick_MAE_all": 0.08334933966398239, + "eval_seeclick_MAE_h": 0.03739124909043312, + "eval_seeclick_MAE_w": 0.11913871020078659, + "eval_seeclick_MAE_x": 0.1399688944220543, + "eval_seeclick_MAE_y": 0.03689852450042963, + "eval_seeclick_NUM_probability": 0.9999872446060181, + "eval_seeclick_inside_bbox": 0.7982954680919647, + "eval_seeclick_loss": 0.3014046847820282, + "eval_seeclick_loss_ce": 0.010054897516965866, + "eval_seeclick_loss_iou": 0.4788818359375, + "eval_seeclick_loss_num": 0.06056976318359375, + "eval_seeclick_loss_xval": 0.303009033203125, + "eval_seeclick_runtime": 72.9882, + "eval_seeclick_samples_per_second": 0.589, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 290932672, + "step": 3000 + }, + { + "epoch": 0.29331247555729373, + "eval_icons_CIoU": 0.6426614224910736, + "eval_icons_GIoU": 0.6302827596664429, + "eval_icons_IoU": 0.6796404719352722, + "eval_icons_MAE_all": 0.0697957668453455, + "eval_icons_MAE_h": 0.07489877566695213, + "eval_icons_MAE_w": 0.06663131527602673, + "eval_icons_MAE_x": 0.06524747982621193, + "eval_icons_MAE_y": 0.07240549474954605, + "eval_icons_NUM_probability": 0.999985545873642, + "eval_icons_inside_bbox": 0.7638888955116272, + "eval_icons_loss": 0.2139032781124115, + "eval_icons_loss_ce": 0.00013248931645648554, + "eval_icons_loss_iou": 0.38580322265625, + "eval_icons_loss_num": 0.047206878662109375, + "eval_icons_loss_xval": 0.2361297607421875, + "eval_icons_runtime": 84.6375, + "eval_icons_samples_per_second": 0.591, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 290932672, + "step": 3000 + }, + { + "epoch": 0.29331247555729373, + "eval_screenspot_CIoU": 0.2899338901042938, + "eval_screenspot_GIoU": 0.2683684130509694, + "eval_screenspot_IoU": 0.3751356105009715, + "eval_screenspot_MAE_all": 0.15593479573726654, + "eval_screenspot_MAE_h": 0.11565586179494858, + "eval_screenspot_MAE_w": 0.20626923193534216, + "eval_screenspot_MAE_x": 0.1854534794886907, + "eval_screenspot_MAE_y": 0.11636061718066533, + "eval_screenspot_NUM_probability": 0.9999725023905436, + "eval_screenspot_inside_bbox": 0.606250007947286, + "eval_screenspot_loss": 0.5674017071723938, + "eval_screenspot_loss_ce": 0.01855529161791007, + "eval_screenspot_loss_iou": 0.3348388671875, + "eval_screenspot_loss_num": 0.11060078938802083, + "eval_screenspot_loss_xval": 0.5530598958333334, + "eval_screenspot_runtime": 160.9769, + "eval_screenspot_samples_per_second": 0.553, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 290932672, + "step": 3000 + }, + { + "epoch": 0.29331247555729373, + "eval_compot_CIoU": 0.4296267628669739, + "eval_compot_GIoU": 0.41331177949905396, + "eval_compot_IoU": 0.5001751780509949, + "eval_compot_MAE_all": 0.09852513298392296, + "eval_compot_MAE_h": 0.08171522617340088, + "eval_compot_MAE_w": 0.11632245033979416, + "eval_compot_MAE_x": 0.11341175809502602, + "eval_compot_MAE_y": 0.08265109732747078, + "eval_compot_NUM_probability": 0.9999746084213257, + "eval_compot_inside_bbox": 0.6614583432674408, + "eval_compot_loss": 0.3151237964630127, + "eval_compot_loss_ce": 0.01693264301866293, + "eval_compot_loss_iou": 0.45928955078125, + "eval_compot_loss_num": 0.052776336669921875, + "eval_compot_loss_xval": 0.26397705078125, + "eval_compot_runtime": 86.9128, + "eval_compot_samples_per_second": 0.575, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 290932672, + "step": 3000 + }, + { + "epoch": 0.29331247555729373, + "loss": 0.24520838260650635, + "loss_ce": 0.016631729900836945, + "loss_iou": 0.53515625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 290932672, + "step": 3000 + }, + { + "epoch": 0.2934102463824795, + "grad_norm": 12.842651883922581, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 291030188, + "step": 3001 + }, + { + "epoch": 0.2934102463824795, + "loss": 0.07844547182321548, + "loss_ce": 0.00996402744203806, + "loss_iou": 0.35546875, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 291030188, + "step": 3001 + }, + { + "epoch": 0.29350801720766523, + "grad_norm": 14.175311767918036, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 291127976, + "step": 3002 + }, + { + "epoch": 0.29350801720766523, + "loss": 0.12193252146244049, + "loss_ce": 0.007232195697724819, + "loss_iou": 0.291015625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 291127976, + "step": 3002 + }, + { + "epoch": 0.293605788032851, + "grad_norm": 7.421998484608873, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 291225520, + "step": 3003 + }, + { + "epoch": 0.293605788032851, + "loss": 0.09145255386829376, + "loss_ce": 0.0033025331795215607, + "loss_iou": 0.28125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 291225520, + "step": 3003 + }, + { + "epoch": 0.2937035588580368, + "grad_norm": 3.957082353062329, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 291322420, + "step": 3004 + }, + { + "epoch": 0.2937035588580368, + "loss": 0.12241888046264648, + "loss_ce": 0.0041327523067593575, + "loss_iou": 0.283203125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 291322420, + "step": 3004 + }, + { + "epoch": 0.29380132968322253, + "grad_norm": 3.3683581540728516, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 291420084, + "step": 3005 + }, + { + "epoch": 0.29380132968322253, + "loss": 0.09789176285266876, + "loss_ce": 0.0025548539124429226, + "loss_iou": 0.28125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 291420084, + "step": 3005 + }, + { + "epoch": 0.2938991005084083, + "grad_norm": 7.230496864367292, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 291516228, + "step": 3006 + }, + { + "epoch": 0.2938991005084083, + "loss": 0.10693426430225372, + "loss_ce": 0.006394105963408947, + "loss_iou": 0.353515625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 291516228, + "step": 3006 + }, + { + "epoch": 0.29399687133359403, + "grad_norm": 29.471463757363225, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 291613480, + "step": 3007 + }, + { + "epoch": 0.29399687133359403, + "loss": 0.09250938892364502, + "loss_ce": 0.006648179143667221, + "loss_iou": 0.3125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 291613480, + "step": 3007 + }, + { + "epoch": 0.29409464215877984, + "grad_norm": 5.0392794664677085, + "learning_rate": 5e-05, + "loss": 0.1053, + "num_input_tokens_seen": 291710384, + "step": 3008 + }, + { + "epoch": 0.29409464215877984, + "loss": 0.09318957477807999, + "loss_ce": 0.00630602752789855, + "loss_iou": 0.478515625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 291710384, + "step": 3008 + }, + { + "epoch": 0.2941924129839656, + "grad_norm": 2.9685754526459904, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 291806656, + "step": 3009 + }, + { + "epoch": 0.2941924129839656, + "loss": 0.0741468146443367, + "loss_ce": 0.003254485782235861, + "loss_iou": 0.365234375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 291806656, + "step": 3009 + }, + { + "epoch": 0.29429018380915134, + "grad_norm": 7.824602453841099, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 291904520, + "step": 3010 + }, + { + "epoch": 0.29429018380915134, + "loss": 0.06199987977743149, + "loss_ce": 0.0033298360649496317, + "loss_iou": 0.34765625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 291904520, + "step": 3010 + }, + { + "epoch": 0.2943879546343371, + "grad_norm": 6.733507762166484, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 292000876, + "step": 3011 + }, + { + "epoch": 0.2943879546343371, + "loss": 0.09326212108135223, + "loss_ce": 0.0027698734775185585, + "loss_iou": 0.365234375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 292000876, + "step": 3011 + }, + { + "epoch": 0.2944857254595229, + "grad_norm": 4.557364232132908, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 292097448, + "step": 3012 + }, + { + "epoch": 0.2944857254595229, + "loss": 0.0559757798910141, + "loss_ce": 0.010733472183346748, + "loss_iou": 0.296875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 292097448, + "step": 3012 + }, + { + "epoch": 0.29458349628470865, + "grad_norm": 4.295448886847341, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 292195100, + "step": 3013 + }, + { + "epoch": 0.29458349628470865, + "loss": 0.12634629011154175, + "loss_ce": 0.005984948482364416, + "loss_iou": 0.28125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 292195100, + "step": 3013 + }, + { + "epoch": 0.2946812671098944, + "grad_norm": 5.514527510645912, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 292292800, + "step": 3014 + }, + { + "epoch": 0.2946812671098944, + "loss": 0.11193761229515076, + "loss_ce": 0.0030814004130661488, + "loss_iou": 0.3515625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 292292800, + "step": 3014 + }, + { + "epoch": 0.29477903793508015, + "grad_norm": 4.982624441003404, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 292390016, + "step": 3015 + }, + { + "epoch": 0.29477903793508015, + "loss": 0.11157278716564178, + "loss_ce": 0.004242467228323221, + "loss_iou": 0.40234375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 292390016, + "step": 3015 + }, + { + "epoch": 0.29487680876026595, + "grad_norm": 10.130136834964356, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 292487320, + "step": 3016 + }, + { + "epoch": 0.29487680876026595, + "loss": 0.08019740879535675, + "loss_ce": 0.0035754041746258736, + "loss_iou": 0.330078125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 292487320, + "step": 3016 + }, + { + "epoch": 0.2949745795854517, + "grad_norm": 14.93219502281555, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 292583900, + "step": 3017 + }, + { + "epoch": 0.2949745795854517, + "loss": 0.06716319173574448, + "loss_ce": 0.005227765999734402, + "loss_iou": 0.38671875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 292583900, + "step": 3017 + }, + { + "epoch": 0.29507235041063745, + "grad_norm": 3.299130885466467, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 292680720, + "step": 3018 + }, + { + "epoch": 0.29507235041063745, + "loss": 0.11372419446706772, + "loss_ce": 0.011154609732329845, + "loss_iou": 0.267578125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 292680720, + "step": 3018 + }, + { + "epoch": 0.29517012123582326, + "grad_norm": 7.7845937758889265, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 292778212, + "step": 3019 + }, + { + "epoch": 0.29517012123582326, + "loss": 0.09606379270553589, + "loss_ce": 0.00889032706618309, + "loss_iou": 0.435546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 292778212, + "step": 3019 + }, + { + "epoch": 0.295267892061009, + "grad_norm": 10.306479207805186, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 292875180, + "step": 3020 + }, + { + "epoch": 0.295267892061009, + "loss": 0.12631455063819885, + "loss_ce": 0.00436630891636014, + "loss_iou": 0.2353515625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 292875180, + "step": 3020 + }, + { + "epoch": 0.29536566288619476, + "grad_norm": 4.9816712655327136, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 292972372, + "step": 3021 + }, + { + "epoch": 0.29536566288619476, + "loss": 0.10062123090028763, + "loss_ce": 0.0049638813361525536, + "loss_iou": 0.33984375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 292972372, + "step": 3021 + }, + { + "epoch": 0.2954634337113805, + "grad_norm": 10.24674130508847, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 293069828, + "step": 3022 + }, + { + "epoch": 0.2954634337113805, + "loss": 0.08263334631919861, + "loss_ce": 0.01086363010108471, + "loss_iou": 0.322265625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 293069828, + "step": 3022 + }, + { + "epoch": 0.2955612045365663, + "grad_norm": 15.459850878217855, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 293167248, + "step": 3023 + }, + { + "epoch": 0.2955612045365663, + "loss": 0.07521186023950577, + "loss_ce": 0.004174564965069294, + "loss_iou": 0.3828125, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 293167248, + "step": 3023 + }, + { + "epoch": 0.29565897536175206, + "grad_norm": 17.155330073491353, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 293264008, + "step": 3024 + }, + { + "epoch": 0.29565897536175206, + "loss": 0.09100653976202011, + "loss_ce": 0.006076123099774122, + "loss_iou": 0.34375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 293264008, + "step": 3024 + }, + { + "epoch": 0.2957567461869378, + "grad_norm": 6.882913411662235, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 293360660, + "step": 3025 + }, + { + "epoch": 0.2957567461869378, + "loss": 0.08183833956718445, + "loss_ce": 0.0060479361563920975, + "loss_iou": 0.2236328125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 293360660, + "step": 3025 + }, + { + "epoch": 0.29585451701212356, + "grad_norm": 6.545567185201839, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 293457376, + "step": 3026 + }, + { + "epoch": 0.29585451701212356, + "loss": 0.10224729776382446, + "loss_ce": 0.0044994959607720375, + "loss_iou": 0.2734375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 293457376, + "step": 3026 + }, + { + "epoch": 0.29595228783730937, + "grad_norm": 8.413151969842835, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 293554464, + "step": 3027 + }, + { + "epoch": 0.29595228783730937, + "loss": 0.08990877866744995, + "loss_ce": 0.00684756226837635, + "loss_iou": 0.318359375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 293554464, + "step": 3027 + }, + { + "epoch": 0.2960500586624951, + "grad_norm": 6.18010913583783, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 293650216, + "step": 3028 + }, + { + "epoch": 0.2960500586624951, + "loss": 0.0484326034784317, + "loss_ce": 0.007611528504639864, + "loss_iou": 0.291015625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 293650216, + "step": 3028 + }, + { + "epoch": 0.29614782948768087, + "grad_norm": 10.233612212762036, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 293747168, + "step": 3029 + }, + { + "epoch": 0.29614782948768087, + "loss": 0.07923425734043121, + "loss_ce": 0.005183352157473564, + "loss_iou": 0.33203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 293747168, + "step": 3029 + }, + { + "epoch": 0.2962456003128666, + "grad_norm": 30.45728215387318, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 293844436, + "step": 3030 + }, + { + "epoch": 0.2962456003128666, + "loss": 0.08432930707931519, + "loss_ce": 0.003106769174337387, + "loss_iou": 0.330078125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 293844436, + "step": 3030 + }, + { + "epoch": 0.2963433711380524, + "grad_norm": 9.729994404469853, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 293941856, + "step": 3031 + }, + { + "epoch": 0.2963433711380524, + "loss": 0.051581576466560364, + "loss_ce": 0.0034706152509897947, + "loss_iou": 0.248046875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 293941856, + "step": 3031 + }, + { + "epoch": 0.2964411419632382, + "grad_norm": 3.9451940775322947, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 294039104, + "step": 3032 + }, + { + "epoch": 0.2964411419632382, + "loss": 0.07468906790018082, + "loss_ce": 0.0041476828046143055, + "loss_iou": 0.40625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 294039104, + "step": 3032 + }, + { + "epoch": 0.2965389127884239, + "grad_norm": 4.015820563693909, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 294136076, + "step": 3033 + }, + { + "epoch": 0.2965389127884239, + "loss": 0.0862262099981308, + "loss_ce": 0.004873979836702347, + "loss_iou": 0.326171875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 294136076, + "step": 3033 + }, + { + "epoch": 0.2966366836136097, + "grad_norm": 9.607334739098372, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 294234088, + "step": 3034 + }, + { + "epoch": 0.2966366836136097, + "loss": 0.05376685783267021, + "loss_ce": 0.004259715788066387, + "loss_iou": 0.28125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 294234088, + "step": 3034 + }, + { + "epoch": 0.2967344544387955, + "grad_norm": 9.729249735559533, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 294330308, + "step": 3035 + }, + { + "epoch": 0.2967344544387955, + "loss": 0.12191865593194962, + "loss_ce": 0.005860304459929466, + "loss_iou": 0.37109375, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 294330308, + "step": 3035 + }, + { + "epoch": 0.29683222526398123, + "grad_norm": 9.023228599650766, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 294426732, + "step": 3036 + }, + { + "epoch": 0.29683222526398123, + "loss": 0.08249248564243317, + "loss_ce": 0.005344049073755741, + "loss_iou": 0.314453125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 294426732, + "step": 3036 + }, + { + "epoch": 0.296929996089167, + "grad_norm": 16.422063198504187, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 294523432, + "step": 3037 + }, + { + "epoch": 0.296929996089167, + "loss": 0.05680471286177635, + "loss_ce": 0.0028496356680989265, + "loss_iou": 0.328125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 294523432, + "step": 3037 + }, + { + "epoch": 0.29702776691435273, + "grad_norm": 9.821827796712393, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 294619320, + "step": 3038 + }, + { + "epoch": 0.29702776691435273, + "loss": 0.1631486713886261, + "loss_ce": 0.003968989010900259, + "loss_iou": 0.396484375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 294619320, + "step": 3038 + }, + { + "epoch": 0.29712553773953854, + "grad_norm": 4.845818525832918, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 294716112, + "step": 3039 + }, + { + "epoch": 0.29712553773953854, + "loss": 0.11282453685998917, + "loss_ce": 0.001587962033227086, + "loss_iou": 0.30078125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 294716112, + "step": 3039 + }, + { + "epoch": 0.2972233085647243, + "grad_norm": 5.247500483678064, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 294813020, + "step": 3040 + }, + { + "epoch": 0.2972233085647243, + "loss": 0.08972213417291641, + "loss_ce": 0.0032353149726986885, + "loss_iou": 0.287109375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 294813020, + "step": 3040 + }, + { + "epoch": 0.29732107938991004, + "grad_norm": 4.478033694713018, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 294910672, + "step": 3041 + }, + { + "epoch": 0.29732107938991004, + "loss": 0.06294244527816772, + "loss_ce": 0.0056304289028048515, + "loss_iou": 0.400390625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 294910672, + "step": 3041 + }, + { + "epoch": 0.29741885021509584, + "grad_norm": 2.6362861677646308, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 295007568, + "step": 3042 + }, + { + "epoch": 0.29741885021509584, + "loss": 0.10501284897327423, + "loss_ce": 0.0038013048470020294, + "loss_iou": 0.3359375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 295007568, + "step": 3042 + }, + { + "epoch": 0.2975166210402816, + "grad_norm": 5.294977969905846, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 295105228, + "step": 3043 + }, + { + "epoch": 0.2975166210402816, + "loss": 0.10359402000904083, + "loss_ce": 0.0013906497042626143, + "loss_iou": 0.380859375, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 295105228, + "step": 3043 + }, + { + "epoch": 0.29761439186546734, + "grad_norm": 8.97411071692243, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 295202204, + "step": 3044 + }, + { + "epoch": 0.29761439186546734, + "loss": 0.05042263865470886, + "loss_ce": 0.0043868692591786385, + "loss_iou": 0.263671875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 295202204, + "step": 3044 + }, + { + "epoch": 0.2977121626906531, + "grad_norm": 9.842016467427152, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 295298768, + "step": 3045 + }, + { + "epoch": 0.2977121626906531, + "loss": 0.11134198307991028, + "loss_ce": 0.0020585369784384966, + "loss_iou": 0.359375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 295298768, + "step": 3045 + }, + { + "epoch": 0.2978099335158389, + "grad_norm": 9.250470432065512, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 295395596, + "step": 3046 + }, + { + "epoch": 0.2978099335158389, + "loss": 0.128673255443573, + "loss_ce": 0.014506988227367401, + "loss_iou": 0.357421875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 295395596, + "step": 3046 + }, + { + "epoch": 0.29790770434102465, + "grad_norm": 13.523784914886827, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 295492896, + "step": 3047 + }, + { + "epoch": 0.29790770434102465, + "loss": 0.0678701102733612, + "loss_ce": 0.0042199729941785336, + "loss_iou": 0.2353515625, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 295492896, + "step": 3047 + }, + { + "epoch": 0.2980054751662104, + "grad_norm": 4.108500973930889, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 295589104, + "step": 3048 + }, + { + "epoch": 0.2980054751662104, + "loss": 0.06494991481304169, + "loss_ce": 0.00463192630559206, + "loss_iou": 0.2421875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 295589104, + "step": 3048 + }, + { + "epoch": 0.29810324599139615, + "grad_norm": 2.4595801514340767, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 295687288, + "step": 3049 + }, + { + "epoch": 0.29810324599139615, + "loss": 0.12033876031637192, + "loss_ce": 0.008201923221349716, + "loss_iou": 0.369140625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 295687288, + "step": 3049 + }, + { + "epoch": 0.29820101681658195, + "grad_norm": 10.730741917999246, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 295784596, + "step": 3050 + }, + { + "epoch": 0.29820101681658195, + "loss": 0.07408903539180756, + "loss_ce": 0.004596695303916931, + "loss_iou": 0.33203125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 295784596, + "step": 3050 + }, + { + "epoch": 0.2982987876417677, + "grad_norm": 12.695068000713134, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 295881512, + "step": 3051 + }, + { + "epoch": 0.2982987876417677, + "loss": 0.0882435292005539, + "loss_ce": 0.005937620066106319, + "loss_iou": 0.3125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 295881512, + "step": 3051 + }, + { + "epoch": 0.29839655846695345, + "grad_norm": 19.213720886566918, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 295978772, + "step": 3052 + }, + { + "epoch": 0.29839655846695345, + "loss": 0.1042870432138443, + "loss_ce": 0.005928894504904747, + "loss_iou": 0.36328125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 295978772, + "step": 3052 + }, + { + "epoch": 0.2984943292921392, + "grad_norm": 30.184870982123385, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 296075752, + "step": 3053 + }, + { + "epoch": 0.2984943292921392, + "loss": 0.07480459660291672, + "loss_ce": 0.007894808426499367, + "loss_iou": 0.34375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 296075752, + "step": 3053 + }, + { + "epoch": 0.298592100117325, + "grad_norm": 10.969436186609881, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 296173200, + "step": 3054 + }, + { + "epoch": 0.298592100117325, + "loss": 0.06987336277961731, + "loss_ce": 0.005008247680962086, + "loss_iou": 0.357421875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 296173200, + "step": 3054 + }, + { + "epoch": 0.29868987094251076, + "grad_norm": 15.854534854088959, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 296269816, + "step": 3055 + }, + { + "epoch": 0.29868987094251076, + "loss": 0.07881200313568115, + "loss_ce": 0.0017856310587376356, + "loss_iou": 0.357421875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 296269816, + "step": 3055 + }, + { + "epoch": 0.2987876417676965, + "grad_norm": 5.92391046237054, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 296367348, + "step": 3056 + }, + { + "epoch": 0.2987876417676965, + "loss": 0.07733490318059921, + "loss_ce": 0.004260563291609287, + "loss_iou": 0.4375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 296367348, + "step": 3056 + }, + { + "epoch": 0.29888541259288226, + "grad_norm": 8.98454748930653, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 296463928, + "step": 3057 + }, + { + "epoch": 0.29888541259288226, + "loss": 0.10204342007637024, + "loss_ce": 0.008491787128150463, + "loss_iou": 0.341796875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 296463928, + "step": 3057 + }, + { + "epoch": 0.29898318341806807, + "grad_norm": 16.231387525838436, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 296560576, + "step": 3058 + }, + { + "epoch": 0.29898318341806807, + "loss": 0.07514055073261261, + "loss_ce": 0.00569780170917511, + "loss_iou": 0.259765625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 296560576, + "step": 3058 + }, + { + "epoch": 0.2990809542432538, + "grad_norm": 1.7518364530363155, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 296657308, + "step": 3059 + }, + { + "epoch": 0.2990809542432538, + "loss": 0.08855992555618286, + "loss_ce": 0.0023477617651224136, + "loss_iou": 0.291015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 296657308, + "step": 3059 + }, + { + "epoch": 0.29917872506843957, + "grad_norm": 6.203747194881082, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 296753424, + "step": 3060 + }, + { + "epoch": 0.29917872506843957, + "loss": 0.0626310259103775, + "loss_ce": 0.001901047071442008, + "loss_iou": 0.298828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 296753424, + "step": 3060 + }, + { + "epoch": 0.2992764958936253, + "grad_norm": 3.2855582541199073, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 296850056, + "step": 3061 + }, + { + "epoch": 0.2992764958936253, + "loss": 0.08167918026447296, + "loss_ce": 0.003584702033549547, + "loss_iou": 0.2734375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 296850056, + "step": 3061 + }, + { + "epoch": 0.2993742667188111, + "grad_norm": 24.73764701871769, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 296947168, + "step": 3062 + }, + { + "epoch": 0.2993742667188111, + "loss": 0.06999073922634125, + "loss_ce": 0.00254688854329288, + "loss_iou": 0.28515625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 296947168, + "step": 3062 + }, + { + "epoch": 0.29947203754399687, + "grad_norm": 34.453439120231884, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 297043776, + "step": 3063 + }, + { + "epoch": 0.29947203754399687, + "loss": 0.06516994535923004, + "loss_ce": 0.0012356238439679146, + "loss_iou": 0.251953125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 297043776, + "step": 3063 + }, + { + "epoch": 0.2995698083691826, + "grad_norm": 16.410173532057453, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 297140816, + "step": 3064 + }, + { + "epoch": 0.2995698083691826, + "loss": 0.09150601923465729, + "loss_ce": 0.002902047010138631, + "loss_iou": 0.3125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 297140816, + "step": 3064 + }, + { + "epoch": 0.2996675791943684, + "grad_norm": 4.897311245481428, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 297238652, + "step": 3065 + }, + { + "epoch": 0.2996675791943684, + "loss": 0.07387037575244904, + "loss_ce": 0.008730602450668812, + "loss_iou": 0.37109375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 297238652, + "step": 3065 + }, + { + "epoch": 0.2997653500195542, + "grad_norm": 12.681417387125027, + "learning_rate": 5e-05, + "loss": 0.1481, + "num_input_tokens_seen": 297336100, + "step": 3066 + }, + { + "epoch": 0.2997653500195542, + "loss": 0.1582365185022354, + "loss_ce": 0.007296569645404816, + "loss_iou": 0.400390625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 297336100, + "step": 3066 + }, + { + "epoch": 0.2998631208447399, + "grad_norm": 3.189568412367874, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 297433580, + "step": 3067 + }, + { + "epoch": 0.2998631208447399, + "loss": 0.06786826997995377, + "loss_ce": 0.002652205526828766, + "loss_iou": 0.47265625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 297433580, + "step": 3067 + }, + { + "epoch": 0.2999608916699257, + "grad_norm": 9.395354866159115, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 297529488, + "step": 3068 + }, + { + "epoch": 0.2999608916699257, + "loss": 0.06218036264181137, + "loss_ce": 0.005585513077676296, + "loss_iou": 0.31640625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 297529488, + "step": 3068 + }, + { + "epoch": 0.3000586624951115, + "grad_norm": 24.0195450398802, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 297627664, + "step": 3069 + }, + { + "epoch": 0.3000586624951115, + "loss": 0.06128115952014923, + "loss_ce": 0.007127718068659306, + "loss_iou": 0.361328125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 297627664, + "step": 3069 + }, + { + "epoch": 0.30015643332029723, + "grad_norm": 30.775769326941205, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 297724944, + "step": 3070 + }, + { + "epoch": 0.30015643332029723, + "loss": 0.05376912280917168, + "loss_ce": 0.004055988974869251, + "loss_iou": 0.3359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 297724944, + "step": 3070 + }, + { + "epoch": 0.300254204145483, + "grad_norm": 18.564861837205946, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 297822336, + "step": 3071 + }, + { + "epoch": 0.300254204145483, + "loss": 0.12246339023113251, + "loss_ce": 0.004970707464963198, + "loss_iou": 0.302734375, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 297822336, + "step": 3071 + }, + { + "epoch": 0.30035197497066873, + "grad_norm": 2.0185811936099207, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 297919332, + "step": 3072 + }, + { + "epoch": 0.30035197497066873, + "loss": 0.05659199133515358, + "loss_ce": 0.002644540974870324, + "loss_iou": 0.353515625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 297919332, + "step": 3072 + }, + { + "epoch": 0.30044974579585454, + "grad_norm": 8.226966220667855, + "learning_rate": 5e-05, + "loss": 0.1333, + "num_input_tokens_seen": 298015548, + "step": 3073 + }, + { + "epoch": 0.30044974579585454, + "loss": 0.1281425505876541, + "loss_ce": 0.011748509481549263, + "loss_iou": 0.232421875, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 298015548, + "step": 3073 + }, + { + "epoch": 0.3005475166210403, + "grad_norm": 14.828646580651307, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 298112164, + "step": 3074 + }, + { + "epoch": 0.3005475166210403, + "loss": 0.08877521753311157, + "loss_ce": 0.005111269652843475, + "loss_iou": 0.3203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 298112164, + "step": 3074 + }, + { + "epoch": 0.30064528744622604, + "grad_norm": 5.7843587671699535, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 298208856, + "step": 3075 + }, + { + "epoch": 0.30064528744622604, + "loss": 0.1336909383535385, + "loss_ce": 0.0069361780770123005, + "loss_iou": 0.359375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 298208856, + "step": 3075 + }, + { + "epoch": 0.3007430582714118, + "grad_norm": 3.0697109766133917, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 298305224, + "step": 3076 + }, + { + "epoch": 0.3007430582714118, + "loss": 0.05080212652683258, + "loss_ce": 0.008130920119583607, + "loss_iou": 0.322265625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 298305224, + "step": 3076 + }, + { + "epoch": 0.3008408290965976, + "grad_norm": 5.674629110900616, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 298402068, + "step": 3077 + }, + { + "epoch": 0.3008408290965976, + "loss": 0.11329938471317291, + "loss_ce": 0.004717841744422913, + "loss_iou": 0.396484375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 298402068, + "step": 3077 + }, + { + "epoch": 0.30093859992178335, + "grad_norm": 5.809384542242858, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 298498436, + "step": 3078 + }, + { + "epoch": 0.30093859992178335, + "loss": 0.07795412093400955, + "loss_ce": 0.006604018621146679, + "loss_iou": 0.3359375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 298498436, + "step": 3078 + }, + { + "epoch": 0.3010363707469691, + "grad_norm": 8.485808749680322, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 298594976, + "step": 3079 + }, + { + "epoch": 0.3010363707469691, + "loss": 0.09149688482284546, + "loss_ce": 0.004826962482184172, + "loss_iou": 0.302734375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 298594976, + "step": 3079 + }, + { + "epoch": 0.30113414157215485, + "grad_norm": 9.032297358748812, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 298691984, + "step": 3080 + }, + { + "epoch": 0.30113414157215485, + "loss": 0.07942214608192444, + "loss_ce": 0.00552383903414011, + "loss_iou": 0.31640625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 298691984, + "step": 3080 + }, + { + "epoch": 0.30123191239734065, + "grad_norm": 15.271836130922798, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 298790144, + "step": 3081 + }, + { + "epoch": 0.30123191239734065, + "loss": 0.07669989764690399, + "loss_ce": 0.006997755728662014, + "loss_iou": 0.412109375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 298790144, + "step": 3081 + }, + { + "epoch": 0.3013296832225264, + "grad_norm": 6.788760372453667, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 298886872, + "step": 3082 + }, + { + "epoch": 0.3013296832225264, + "loss": 0.09002657979726791, + "loss_ce": 0.009017674252390862, + "loss_iou": 0.263671875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 298886872, + "step": 3082 + }, + { + "epoch": 0.30142745404771215, + "grad_norm": 2.695046809409775, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 298983736, + "step": 3083 + }, + { + "epoch": 0.30142745404771215, + "loss": 0.044836848974227905, + "loss_ce": 0.006254999898374081, + "loss_iou": 0.4140625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 298983736, + "step": 3083 + }, + { + "epoch": 0.3015252248728979, + "grad_norm": 4.00352817153514, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 299080660, + "step": 3084 + }, + { + "epoch": 0.3015252248728979, + "loss": 0.09969139099121094, + "loss_ce": 0.0017909979214891791, + "loss_iou": 0.251953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 299080660, + "step": 3084 + }, + { + "epoch": 0.3016229956980837, + "grad_norm": 4.385017654212852, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 299177792, + "step": 3085 + }, + { + "epoch": 0.3016229956980837, + "loss": 0.0820767730474472, + "loss_ce": 0.011527767404913902, + "loss_iou": 0.2578125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 299177792, + "step": 3085 + }, + { + "epoch": 0.30172076652326946, + "grad_norm": 8.431331442027028, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 299275352, + "step": 3086 + }, + { + "epoch": 0.30172076652326946, + "loss": 0.07056894898414612, + "loss_ce": 0.007964042015373707, + "loss_iou": 0.287109375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 299275352, + "step": 3086 + }, + { + "epoch": 0.3018185373484552, + "grad_norm": 3.4835767654815935, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 299372052, + "step": 3087 + }, + { + "epoch": 0.3018185373484552, + "loss": 0.08580619096755981, + "loss_ce": 0.003500275779515505, + "loss_iou": 0.326171875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 299372052, + "step": 3087 + }, + { + "epoch": 0.301916308173641, + "grad_norm": 9.954563915482112, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 299468792, + "step": 3088 + }, + { + "epoch": 0.301916308173641, + "loss": 0.071570485830307, + "loss_ce": 0.0029059364460408688, + "loss_iou": 0.33203125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 299468792, + "step": 3088 + }, + { + "epoch": 0.30201407899882676, + "grad_norm": 45.99541739698542, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 299566168, + "step": 3089 + }, + { + "epoch": 0.30201407899882676, + "loss": 0.0716058686375618, + "loss_ce": 0.004711337387561798, + "loss_iou": 0.33203125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 299566168, + "step": 3089 + }, + { + "epoch": 0.3021118498240125, + "grad_norm": 50.81563019764911, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 299664084, + "step": 3090 + }, + { + "epoch": 0.3021118498240125, + "loss": 0.12188442051410675, + "loss_ce": 0.006444053258746862, + "loss_iou": 0.29296875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 299664084, + "step": 3090 + }, + { + "epoch": 0.30220962064919826, + "grad_norm": 17.661420152606386, + "learning_rate": 5e-05, + "loss": 0.1331, + "num_input_tokens_seen": 299760484, + "step": 3091 + }, + { + "epoch": 0.30220962064919826, + "loss": 0.14533358812332153, + "loss_ce": 0.007333112880587578, + "loss_iou": 0.33984375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 299760484, + "step": 3091 + }, + { + "epoch": 0.30230739147438407, + "grad_norm": 5.011721464522185, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 299857352, + "step": 3092 + }, + { + "epoch": 0.30230739147438407, + "loss": 0.11289352178573608, + "loss_ce": 0.0031523124780505896, + "loss_iou": 0.361328125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 299857352, + "step": 3092 + }, + { + "epoch": 0.3024051622995698, + "grad_norm": 13.612746290967074, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 299954076, + "step": 3093 + }, + { + "epoch": 0.3024051622995698, + "loss": 0.13887420296669006, + "loss_ce": 0.00447479821741581, + "loss_iou": 0.296875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 299954076, + "step": 3093 + }, + { + "epoch": 0.30250293312475557, + "grad_norm": 6.195030765983373, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 300051248, + "step": 3094 + }, + { + "epoch": 0.30250293312475557, + "loss": 0.0960787832736969, + "loss_ce": 0.006814868189394474, + "loss_iou": 0.361328125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 300051248, + "step": 3094 + }, + { + "epoch": 0.3026007039499413, + "grad_norm": 3.51556708074643, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 300147980, + "step": 3095 + }, + { + "epoch": 0.3026007039499413, + "loss": 0.10096681118011475, + "loss_ce": 0.00955140683799982, + "loss_iou": 0.306640625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 300147980, + "step": 3095 + }, + { + "epoch": 0.3026984747751271, + "grad_norm": 3.001373635966785, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 300244804, + "step": 3096 + }, + { + "epoch": 0.3026984747751271, + "loss": 0.03201230987906456, + "loss_ce": 0.0037644742988049984, + "loss_iou": 0.27734375, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 300244804, + "step": 3096 + }, + { + "epoch": 0.3027962456003129, + "grad_norm": 6.676414145482318, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 300341420, + "step": 3097 + }, + { + "epoch": 0.3027962456003129, + "loss": 0.09973222017288208, + "loss_ce": 0.004212196916341782, + "loss_iou": 0.28515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 300341420, + "step": 3097 + }, + { + "epoch": 0.3028940164254986, + "grad_norm": 16.192995056418447, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 300439036, + "step": 3098 + }, + { + "epoch": 0.3028940164254986, + "loss": 0.07491080462932587, + "loss_ce": 0.006482764147222042, + "loss_iou": 0.283203125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 300439036, + "step": 3098 + }, + { + "epoch": 0.3029917872506844, + "grad_norm": 13.307573752585931, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 300535868, + "step": 3099 + }, + { + "epoch": 0.3029917872506844, + "loss": 0.10491478443145752, + "loss_ce": 0.009242177940905094, + "loss_iou": 0.39453125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 300535868, + "step": 3099 + }, + { + "epoch": 0.3030895580758702, + "grad_norm": 5.202674503355935, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 300632332, + "step": 3100 + }, + { + "epoch": 0.3030895580758702, + "loss": 0.07173121720552444, + "loss_ce": 0.004722245037555695, + "loss_iou": 0.30859375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 300632332, + "step": 3100 + }, + { + "epoch": 0.30318732890105593, + "grad_norm": 2.6867468021020047, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 300729320, + "step": 3101 + }, + { + "epoch": 0.30318732890105593, + "loss": 0.14542457461357117, + "loss_ce": 0.0042349970899522305, + "loss_iou": 0.3203125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 300729320, + "step": 3101 + }, + { + "epoch": 0.3032850997262417, + "grad_norm": 4.648061516121004, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 300826424, + "step": 3102 + }, + { + "epoch": 0.3032850997262417, + "loss": 0.07071300595998764, + "loss_ce": 0.021263087168335915, + "loss_iou": 0.263671875, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 300826424, + "step": 3102 + }, + { + "epoch": 0.30338287055142743, + "grad_norm": 5.188442136900408, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 300923560, + "step": 3103 + }, + { + "epoch": 0.30338287055142743, + "loss": 0.0797957330942154, + "loss_ce": 0.002326857764273882, + "loss_iou": 0.3671875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 300923560, + "step": 3103 + }, + { + "epoch": 0.30348064137661324, + "grad_norm": 4.111603657642593, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 301021332, + "step": 3104 + }, + { + "epoch": 0.30348064137661324, + "loss": 0.08034412562847137, + "loss_ce": 0.0047826035879552364, + "loss_iou": 0.388671875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 301021332, + "step": 3104 + }, + { + "epoch": 0.303578412201799, + "grad_norm": 4.229445306125361, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 301118552, + "step": 3105 + }, + { + "epoch": 0.303578412201799, + "loss": 0.06937375664710999, + "loss_ce": 0.00693479971960187, + "loss_iou": 0.30859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 301118552, + "step": 3105 + }, + { + "epoch": 0.30367618302698474, + "grad_norm": 6.956759690352004, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 301215236, + "step": 3106 + }, + { + "epoch": 0.30367618302698474, + "loss": 0.07763499021530151, + "loss_ce": 0.001493627205491066, + "loss_iou": 0.361328125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 301215236, + "step": 3106 + }, + { + "epoch": 0.3037739538521705, + "grad_norm": 7.645611295167096, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 301311768, + "step": 3107 + }, + { + "epoch": 0.3037739538521705, + "loss": 0.10393928736448288, + "loss_ce": 0.00454354053363204, + "loss_iou": 0.2451171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 301311768, + "step": 3107 + }, + { + "epoch": 0.3038717246773563, + "grad_norm": 6.238516017509944, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 301409352, + "step": 3108 + }, + { + "epoch": 0.3038717246773563, + "loss": 0.07102040201425552, + "loss_ce": 0.0039503974840044975, + "loss_iou": 0.3046875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 301409352, + "step": 3108 + }, + { + "epoch": 0.30396949550254204, + "grad_norm": 4.4947634067321225, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 301506176, + "step": 3109 + }, + { + "epoch": 0.30396949550254204, + "loss": 0.047011226415634155, + "loss_ce": 0.004137840121984482, + "loss_iou": 0.2421875, + "loss_num": 0.008544921875, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 301506176, + "step": 3109 + }, + { + "epoch": 0.3040672663277278, + "grad_norm": 7.1773058635921405, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 301602976, + "step": 3110 + }, + { + "epoch": 0.3040672663277278, + "loss": 0.08683217316865921, + "loss_ce": 0.0075475070625543594, + "loss_iou": 0.267578125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 301602976, + "step": 3110 + }, + { + "epoch": 0.3041650371529136, + "grad_norm": 16.96826268734587, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 301700636, + "step": 3111 + }, + { + "epoch": 0.3041650371529136, + "loss": 0.10805687308311462, + "loss_ce": 0.0059145363047719, + "loss_iou": 0.232421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 301700636, + "step": 3111 + }, + { + "epoch": 0.30426280797809935, + "grad_norm": 5.556211315745372, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 301797804, + "step": 3112 + }, + { + "epoch": 0.30426280797809935, + "loss": 0.06882639229297638, + "loss_ce": 0.0028473930433392525, + "loss_iou": 0.3671875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 301797804, + "step": 3112 + }, + { + "epoch": 0.3043605788032851, + "grad_norm": 4.791381490165524, + "learning_rate": 5e-05, + "loss": 0.1265, + "num_input_tokens_seen": 301895040, + "step": 3113 + }, + { + "epoch": 0.3043605788032851, + "loss": 0.16377754509449005, + "loss_ce": 0.006703566759824753, + "loss_iou": 0.3125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 301895040, + "step": 3113 + }, + { + "epoch": 0.30445834962847085, + "grad_norm": 20.391431015730422, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 301993016, + "step": 3114 + }, + { + "epoch": 0.30445834962847085, + "loss": 0.07322610914707184, + "loss_ce": 0.007338659837841988, + "loss_iou": 0.373046875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 301993016, + "step": 3114 + }, + { + "epoch": 0.30455612045365665, + "grad_norm": 33.32184689110883, + "learning_rate": 5e-05, + "loss": 0.1261, + "num_input_tokens_seen": 302089504, + "step": 3115 + }, + { + "epoch": 0.30455612045365665, + "loss": 0.10118414461612701, + "loss_ce": 0.004321351647377014, + "loss_iou": 0.322265625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 302089504, + "step": 3115 + }, + { + "epoch": 0.3046538912788424, + "grad_norm": 16.913878260469097, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 302185944, + "step": 3116 + }, + { + "epoch": 0.3046538912788424, + "loss": 0.05855955928564072, + "loss_ce": 0.004970693960785866, + "loss_iou": 0.357421875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 302185944, + "step": 3116 + }, + { + "epoch": 0.30475166210402815, + "grad_norm": 9.659904854869913, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 302283100, + "step": 3117 + }, + { + "epoch": 0.30475166210402815, + "loss": 0.08351701498031616, + "loss_ce": 0.002920093946158886, + "loss_iou": 0.337890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 302283100, + "step": 3117 + }, + { + "epoch": 0.3048494329292139, + "grad_norm": 8.81199372923945, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 302379796, + "step": 3118 + }, + { + "epoch": 0.3048494329292139, + "loss": 0.05768836289644241, + "loss_ce": 0.0037027685903012753, + "loss_iou": 0.30078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 302379796, + "step": 3118 + }, + { + "epoch": 0.3049472037543997, + "grad_norm": 23.060808681239145, + "learning_rate": 5e-05, + "loss": 0.1294, + "num_input_tokens_seen": 302478588, + "step": 3119 + }, + { + "epoch": 0.3049472037543997, + "loss": 0.14691181480884552, + "loss_ce": 0.013061719946563244, + "loss_iou": 0.359375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 302478588, + "step": 3119 + }, + { + "epoch": 0.30504497457958546, + "grad_norm": 35.74187309006607, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 302575672, + "step": 3120 + }, + { + "epoch": 0.30504497457958546, + "loss": 0.08166079223155975, + "loss_ce": 0.005435514729470015, + "loss_iou": 0.35546875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 302575672, + "step": 3120 + }, + { + "epoch": 0.3051427454047712, + "grad_norm": 10.26022999042282, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 302672188, + "step": 3121 + }, + { + "epoch": 0.3051427454047712, + "loss": 0.13592851161956787, + "loss_ce": 0.009280555881559849, + "loss_iou": 0.232421875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 302672188, + "step": 3121 + }, + { + "epoch": 0.30524051622995696, + "grad_norm": 3.4200322761335062, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 302768536, + "step": 3122 + }, + { + "epoch": 0.30524051622995696, + "loss": 0.09151487052440643, + "loss_ce": 0.009578987024724483, + "loss_iou": 0.287109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 302768536, + "step": 3122 + }, + { + "epoch": 0.30533828705514277, + "grad_norm": 10.89878424742336, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 302865896, + "step": 3123 + }, + { + "epoch": 0.30533828705514277, + "loss": 0.12364844977855682, + "loss_ce": 0.006186290644109249, + "loss_iou": 0.3671875, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 302865896, + "step": 3123 + }, + { + "epoch": 0.3054360578803285, + "grad_norm": 4.558987088483099, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 302963380, + "step": 3124 + }, + { + "epoch": 0.3054360578803285, + "loss": 0.08973149955272675, + "loss_ce": 0.008142752572894096, + "loss_iou": 0.37890625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 302963380, + "step": 3124 + }, + { + "epoch": 0.30553382870551427, + "grad_norm": 4.326811632434881, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 303060176, + "step": 3125 + }, + { + "epoch": 0.30553382870551427, + "loss": 0.09395869821310043, + "loss_ce": 0.0020855297334492207, + "loss_iou": 0.404296875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 303060176, + "step": 3125 + }, + { + "epoch": 0.3056315995307, + "grad_norm": 8.897984854176672, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 303157328, + "step": 3126 + }, + { + "epoch": 0.3056315995307, + "loss": 0.07753780484199524, + "loss_ce": 0.007240560837090015, + "loss_iou": 0.38671875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 303157328, + "step": 3126 + }, + { + "epoch": 0.3057293703558858, + "grad_norm": 4.766574929210793, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 303254432, + "step": 3127 + }, + { + "epoch": 0.3057293703558858, + "loss": 0.10688206553459167, + "loss_ce": 0.005945171695202589, + "loss_iou": 0.35546875, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 303254432, + "step": 3127 + }, + { + "epoch": 0.30582714118107157, + "grad_norm": 6.236228833333526, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 303351552, + "step": 3128 + }, + { + "epoch": 0.30582714118107157, + "loss": 0.08872145414352417, + "loss_ce": 0.00969618372619152, + "loss_iou": 0.373046875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 303351552, + "step": 3128 + }, + { + "epoch": 0.3059249120062573, + "grad_norm": 17.75174216793593, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 303448972, + "step": 3129 + }, + { + "epoch": 0.3059249120062573, + "loss": 0.1140202134847641, + "loss_ce": 0.004752022679895163, + "loss_iou": 0.373046875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 303448972, + "step": 3129 + }, + { + "epoch": 0.30602268283144307, + "grad_norm": 11.195184240039609, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 303545776, + "step": 3130 + }, + { + "epoch": 0.30602268283144307, + "loss": 0.0628722682595253, + "loss_ce": 0.0036529116332530975, + "loss_iou": 0.30859375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 303545776, + "step": 3130 + }, + { + "epoch": 0.3061204536566289, + "grad_norm": 7.226260784110919, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 303643088, + "step": 3131 + }, + { + "epoch": 0.3061204536566289, + "loss": 0.08013644814491272, + "loss_ce": 0.008649024181067944, + "loss_iou": 0.30078125, + "loss_num": 0.01434326171875, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 303643088, + "step": 3131 + }, + { + "epoch": 0.3062182244818146, + "grad_norm": 5.534963350911159, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 303739212, + "step": 3132 + }, + { + "epoch": 0.3062182244818146, + "loss": 0.0822494775056839, + "loss_ce": 0.004864528309553862, + "loss_iou": 0.2451171875, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 303739212, + "step": 3132 + }, + { + "epoch": 0.3063159953070004, + "grad_norm": 6.6506101658881756, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 303837212, + "step": 3133 + }, + { + "epoch": 0.3063159953070004, + "loss": 0.051494382321834564, + "loss_ce": 0.00532891508191824, + "loss_iou": 0.353515625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 303837212, + "step": 3133 + }, + { + "epoch": 0.3064137661321862, + "grad_norm": 8.203610768317747, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 303934764, + "step": 3134 + }, + { + "epoch": 0.3064137661321862, + "loss": 0.05643114447593689, + "loss_ce": 0.0026896900963038206, + "loss_iou": 0.2890625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 303934764, + "step": 3134 + }, + { + "epoch": 0.30651153695737193, + "grad_norm": 11.911952541434374, + "learning_rate": 5e-05, + "loss": 0.1257, + "num_input_tokens_seen": 304032676, + "step": 3135 + }, + { + "epoch": 0.30651153695737193, + "loss": 0.11800564080476761, + "loss_ce": 0.006219753995537758, + "loss_iou": 0.31640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 304032676, + "step": 3135 + }, + { + "epoch": 0.3066093077825577, + "grad_norm": 2.9272348987606707, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 304129212, + "step": 3136 + }, + { + "epoch": 0.3066093077825577, + "loss": 0.07955481112003326, + "loss_ce": 0.0036881130654364824, + "loss_iou": 0.322265625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 304129212, + "step": 3136 + }, + { + "epoch": 0.30670707860774343, + "grad_norm": 22.300709300274306, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 304225384, + "step": 3137 + }, + { + "epoch": 0.30670707860774343, + "loss": 0.062394265085458755, + "loss_ce": 0.0006057071732357144, + "loss_iou": 0.271484375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 304225384, + "step": 3137 + }, + { + "epoch": 0.30680484943292924, + "grad_norm": 13.723127251148753, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 304322700, + "step": 3138 + }, + { + "epoch": 0.30680484943292924, + "loss": 0.10875225067138672, + "loss_ce": 0.0034208372235298157, + "loss_iou": 0.361328125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 304322700, + "step": 3138 + }, + { + "epoch": 0.306902620258115, + "grad_norm": 13.943948242116434, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 304419488, + "step": 3139 + }, + { + "epoch": 0.306902620258115, + "loss": 0.11717386543750763, + "loss_ce": 0.006883344613015652, + "loss_iou": 0.396484375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 304419488, + "step": 3139 + }, + { + "epoch": 0.30700039108330074, + "grad_norm": 7.611485813653946, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 304517144, + "step": 3140 + }, + { + "epoch": 0.30700039108330074, + "loss": 0.10752992331981659, + "loss_ce": 0.009812634438276291, + "loss_iou": 0.37109375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 304517144, + "step": 3140 + }, + { + "epoch": 0.3070981619084865, + "grad_norm": 11.106427768495438, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 304614008, + "step": 3141 + }, + { + "epoch": 0.3070981619084865, + "loss": 0.09600837528705597, + "loss_ce": 0.00952155515551567, + "loss_iou": 0.328125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 304614008, + "step": 3141 + }, + { + "epoch": 0.3071959327336723, + "grad_norm": 10.683640124364087, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 304710928, + "step": 3142 + }, + { + "epoch": 0.3071959327336723, + "loss": 0.11537228524684906, + "loss_ce": 0.0035101003013551235, + "loss_iou": 0.287109375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 304710928, + "step": 3142 + }, + { + "epoch": 0.30729370355885804, + "grad_norm": 22.59121139813485, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 304807068, + "step": 3143 + }, + { + "epoch": 0.30729370355885804, + "loss": 0.10417724400758743, + "loss_ce": 0.00429321127012372, + "loss_iou": 0.33203125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 304807068, + "step": 3143 + }, + { + "epoch": 0.3073914743840438, + "grad_norm": 15.17538966390217, + "learning_rate": 5e-05, + "loss": 0.1294, + "num_input_tokens_seen": 304904384, + "step": 3144 + }, + { + "epoch": 0.3073914743840438, + "loss": 0.17670434713363647, + "loss_ce": 0.0078048100695014, + "loss_iou": 0.44140625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 304904384, + "step": 3144 + }, + { + "epoch": 0.30748924520922954, + "grad_norm": 4.477450552370521, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 305002680, + "step": 3145 + }, + { + "epoch": 0.30748924520922954, + "loss": 0.1125466525554657, + "loss_ce": 0.005704620387405157, + "loss_iou": 0.314453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 305002680, + "step": 3145 + }, + { + "epoch": 0.30758701603441535, + "grad_norm": 7.627044386227768, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 305100300, + "step": 3146 + }, + { + "epoch": 0.30758701603441535, + "loss": 0.07565250992774963, + "loss_ce": 0.0024103152099996805, + "loss_iou": 0.37890625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 305100300, + "step": 3146 + }, + { + "epoch": 0.3076847868596011, + "grad_norm": 15.170159548913512, + "learning_rate": 5e-05, + "loss": 0.1297, + "num_input_tokens_seen": 305196868, + "step": 3147 + }, + { + "epoch": 0.3076847868596011, + "loss": 0.09863235056400299, + "loss_ce": 0.006408231798559427, + "loss_iou": 0.45703125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 305196868, + "step": 3147 + }, + { + "epoch": 0.30778255768478685, + "grad_norm": 7.084614336237808, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 305293476, + "step": 3148 + }, + { + "epoch": 0.30778255768478685, + "loss": 0.1094176322221756, + "loss_ce": 0.005001734010875225, + "loss_iou": 0.357421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 305293476, + "step": 3148 + }, + { + "epoch": 0.3078803285099726, + "grad_norm": 29.774662869578087, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 305390096, + "step": 3149 + }, + { + "epoch": 0.3078803285099726, + "loss": 0.10140714049339294, + "loss_ce": 0.007611365057528019, + "loss_iou": 0.302734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 305390096, + "step": 3149 + }, + { + "epoch": 0.3079780993351584, + "grad_norm": 9.223923544404144, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 305486936, + "step": 3150 + }, + { + "epoch": 0.3079780993351584, + "loss": 0.0743722915649414, + "loss_ce": 0.00874424446374178, + "loss_iou": 0.345703125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 305486936, + "step": 3150 + }, + { + "epoch": 0.30807587016034416, + "grad_norm": 8.599749818091164, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 305583476, + "step": 3151 + }, + { + "epoch": 0.30807587016034416, + "loss": 0.09409819543361664, + "loss_ce": 0.005215749144554138, + "loss_iou": 0.298828125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 305583476, + "step": 3151 + }, + { + "epoch": 0.3081736409855299, + "grad_norm": 8.006986620392285, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 305681100, + "step": 3152 + }, + { + "epoch": 0.3081736409855299, + "loss": 0.07704587280750275, + "loss_ce": 0.0031322992872446775, + "loss_iou": 0.3125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 305681100, + "step": 3152 + }, + { + "epoch": 0.30827141181071566, + "grad_norm": 8.190077958091607, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 305778348, + "step": 3153 + }, + { + "epoch": 0.30827141181071566, + "loss": 0.10499381273984909, + "loss_ce": 0.006254189182072878, + "loss_iou": 0.326171875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 305778348, + "step": 3153 + }, + { + "epoch": 0.30836918263590146, + "grad_norm": 7.56395279287829, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 305876412, + "step": 3154 + }, + { + "epoch": 0.30836918263590146, + "loss": 0.11602143198251724, + "loss_ce": 0.008233346976339817, + "loss_iou": 0.41015625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 305876412, + "step": 3154 + }, + { + "epoch": 0.3084669534610872, + "grad_norm": 4.308720861879482, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 305973920, + "step": 3155 + }, + { + "epoch": 0.3084669534610872, + "loss": 0.11023359000682831, + "loss_ce": 0.008899969980120659, + "loss_iou": 0.37109375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 305973920, + "step": 3155 + }, + { + "epoch": 0.30856472428627296, + "grad_norm": 6.574145554408871, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 306071880, + "step": 3156 + }, + { + "epoch": 0.30856472428627296, + "loss": 0.11321307718753815, + "loss_ce": 0.005486029200255871, + "loss_iou": 0.396484375, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 306071880, + "step": 3156 + }, + { + "epoch": 0.30866249511145877, + "grad_norm": 3.2840840893444154, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 306169508, + "step": 3157 + }, + { + "epoch": 0.30866249511145877, + "loss": 0.08189469575881958, + "loss_ce": 0.0026710587553679943, + "loss_iou": 0.322265625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 306169508, + "step": 3157 + }, + { + "epoch": 0.3087602659366445, + "grad_norm": 13.633754738906795, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 306267256, + "step": 3158 + }, + { + "epoch": 0.3087602659366445, + "loss": 0.08330641686916351, + "loss_ce": 0.0013667168095707893, + "loss_iou": 0.4140625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 306267256, + "step": 3158 + }, + { + "epoch": 0.30885803676183027, + "grad_norm": 4.519029676978829, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 306363744, + "step": 3159 + }, + { + "epoch": 0.30885803676183027, + "loss": 0.07723484933376312, + "loss_ce": 0.0038705947808921337, + "loss_iou": 0.36328125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 306363744, + "step": 3159 + }, + { + "epoch": 0.308955807587016, + "grad_norm": 24.41050747586753, + "learning_rate": 5e-05, + "loss": 0.1564, + "num_input_tokens_seen": 306460304, + "step": 3160 + }, + { + "epoch": 0.308955807587016, + "loss": 0.1731003075838089, + "loss_ce": 0.006367521360516548, + "loss_iou": 0.2578125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 306460304, + "step": 3160 + }, + { + "epoch": 0.3090535784122018, + "grad_norm": 9.167247386290665, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 306557176, + "step": 3161 + }, + { + "epoch": 0.3090535784122018, + "loss": 0.07126779854297638, + "loss_ce": 0.002755837282165885, + "loss_iou": 0.3359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 306557176, + "step": 3161 + }, + { + "epoch": 0.3091513492373876, + "grad_norm": 23.39924322554341, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 306653664, + "step": 3162 + }, + { + "epoch": 0.3091513492373876, + "loss": 0.13124124705791473, + "loss_ce": 0.0070346929132938385, + "loss_iou": 0.2890625, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 306653664, + "step": 3162 + }, + { + "epoch": 0.3092491200625733, + "grad_norm": 22.9557992347865, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 306751092, + "step": 3163 + }, + { + "epoch": 0.3092491200625733, + "loss": 0.06780973076820374, + "loss_ce": 0.0035854848101735115, + "loss_iou": 0.369140625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 306751092, + "step": 3163 + }, + { + "epoch": 0.3093468908877591, + "grad_norm": 5.3273663684578265, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 306848860, + "step": 3164 + }, + { + "epoch": 0.3093468908877591, + "loss": 0.10766489803791046, + "loss_ce": 0.005995588377118111, + "loss_iou": 0.296875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 306848860, + "step": 3164 + }, + { + "epoch": 0.3094446617129449, + "grad_norm": 5.888967698228106, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 306946016, + "step": 3165 + }, + { + "epoch": 0.3094446617129449, + "loss": 0.09205392748117447, + "loss_ce": 0.00831368938088417, + "loss_iou": 0.419921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 306946016, + "step": 3165 + }, + { + "epoch": 0.30954243253813063, + "grad_norm": 8.299180785390085, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 307042932, + "step": 3166 + }, + { + "epoch": 0.30954243253813063, + "loss": 0.07005497813224792, + "loss_ce": 0.005739185959100723, + "loss_iou": 0.3125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 307042932, + "step": 3166 + }, + { + "epoch": 0.3096402033633164, + "grad_norm": 4.979692033219054, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 307139372, + "step": 3167 + }, + { + "epoch": 0.3096402033633164, + "loss": 0.06729203462600708, + "loss_ce": 0.003845987841486931, + "loss_iou": 0.33984375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 307139372, + "step": 3167 + }, + { + "epoch": 0.30973797418850213, + "grad_norm": 4.040495852871154, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 307236768, + "step": 3168 + }, + { + "epoch": 0.30973797418850213, + "loss": 0.09332401305437088, + "loss_ce": 0.0040753548964858055, + "loss_iou": 0.3203125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 307236768, + "step": 3168 + }, + { + "epoch": 0.30983574501368794, + "grad_norm": 13.679610206630915, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 307334236, + "step": 3169 + }, + { + "epoch": 0.30983574501368794, + "loss": 0.08998458087444305, + "loss_ce": 0.006000205408781767, + "loss_iou": 0.33203125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 307334236, + "step": 3169 + }, + { + "epoch": 0.3099335158388737, + "grad_norm": 8.942636503422008, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 307431004, + "step": 3170 + }, + { + "epoch": 0.3099335158388737, + "loss": 0.0848497599363327, + "loss_ce": 0.008227753452956676, + "loss_iou": 0.359375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 307431004, + "step": 3170 + }, + { + "epoch": 0.31003128666405944, + "grad_norm": 4.687046999823848, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 307526324, + "step": 3171 + }, + { + "epoch": 0.31003128666405944, + "loss": 0.06734655797481537, + "loss_ce": 0.0024967030622065067, + "loss_iou": 0.283203125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 307526324, + "step": 3171 + }, + { + "epoch": 0.3101290574892452, + "grad_norm": 11.066430037914603, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 307621792, + "step": 3172 + }, + { + "epoch": 0.3101290574892452, + "loss": 0.11227305978536606, + "loss_ce": 0.0035389328841120005, + "loss_iou": 0.337890625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 307621792, + "step": 3172 + }, + { + "epoch": 0.310226828314431, + "grad_norm": 18.03842337088187, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 307718388, + "step": 3173 + }, + { + "epoch": 0.310226828314431, + "loss": 0.10515372455120087, + "loss_ce": 0.00395743316039443, + "loss_iou": 0.330078125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 307718388, + "step": 3173 + }, + { + "epoch": 0.31032459913961674, + "grad_norm": 3.2052825531792966, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 307814808, + "step": 3174 + }, + { + "epoch": 0.31032459913961674, + "loss": 0.07685834169387817, + "loss_ce": 0.002364936051890254, + "loss_iou": 0.34765625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 307814808, + "step": 3174 + }, + { + "epoch": 0.3104223699648025, + "grad_norm": 12.899979882342635, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 307912180, + "step": 3175 + }, + { + "epoch": 0.3104223699648025, + "loss": 0.09808078408241272, + "loss_ce": 0.002423438709229231, + "loss_iou": 0.275390625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 307912180, + "step": 3175 + }, + { + "epoch": 0.31052014078998824, + "grad_norm": 5.380251445466511, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 308008840, + "step": 3176 + }, + { + "epoch": 0.31052014078998824, + "loss": 0.04686329886317253, + "loss_ce": 0.0021550487726926804, + "loss_iou": 0.28125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 308008840, + "step": 3176 + }, + { + "epoch": 0.31061791161517405, + "grad_norm": 15.423480316297109, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 308105176, + "step": 3177 + }, + { + "epoch": 0.31061791161517405, + "loss": 0.10452204197645187, + "loss_ce": 0.00969629269093275, + "loss_iou": 0.265625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 308105176, + "step": 3177 + }, + { + "epoch": 0.3107156824403598, + "grad_norm": 2.306684570318859, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 308202356, + "step": 3178 + }, + { + "epoch": 0.3107156824403598, + "loss": 0.0913873091340065, + "loss_ce": 0.006472147069871426, + "loss_iou": 0.404296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 308202356, + "step": 3178 + }, + { + "epoch": 0.31081345326554555, + "grad_norm": 11.259692037891746, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 308298532, + "step": 3179 + }, + { + "epoch": 0.31081345326554555, + "loss": 0.06828654557466507, + "loss_ce": 0.004657398909330368, + "loss_iou": 0.3671875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 308298532, + "step": 3179 + }, + { + "epoch": 0.31091122409073135, + "grad_norm": 3.385403586799473, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 308395140, + "step": 3180 + }, + { + "epoch": 0.31091122409073135, + "loss": 0.0628104954957962, + "loss_ce": 0.0022025827784091234, + "loss_iou": 0.291015625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 308395140, + "step": 3180 + }, + { + "epoch": 0.3110089949159171, + "grad_norm": 20.403204807387624, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 308492356, + "step": 3181 + }, + { + "epoch": 0.3110089949159171, + "loss": 0.051950789988040924, + "loss_ce": 0.005121567286550999, + "loss_iou": 0.337890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 308492356, + "step": 3181 + }, + { + "epoch": 0.31110676574110285, + "grad_norm": 16.75483456284088, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 308589472, + "step": 3182 + }, + { + "epoch": 0.31110676574110285, + "loss": 0.08458670228719711, + "loss_ce": 0.0014873379841446877, + "loss_iou": 0.439453125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 308589472, + "step": 3182 + }, + { + "epoch": 0.3112045365662886, + "grad_norm": 14.954422226213023, + "learning_rate": 5e-05, + "loss": 0.1219, + "num_input_tokens_seen": 308686572, + "step": 3183 + }, + { + "epoch": 0.3112045365662886, + "loss": 0.09800649434328079, + "loss_ce": 0.006026506889611483, + "loss_iou": 0.404296875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 308686572, + "step": 3183 + }, + { + "epoch": 0.3113023073914744, + "grad_norm": 3.9536027224033923, + "learning_rate": 5e-05, + "loss": 0.1158, + "num_input_tokens_seen": 308783212, + "step": 3184 + }, + { + "epoch": 0.3113023073914744, + "loss": 0.12023542076349258, + "loss_ce": 0.008655522018671036, + "loss_iou": 0.31640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 308783212, + "step": 3184 + }, + { + "epoch": 0.31140007821666016, + "grad_norm": 3.516921285673469, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 308879412, + "step": 3185 + }, + { + "epoch": 0.31140007821666016, + "loss": 0.03905712813138962, + "loss_ce": 0.001711239805445075, + "loss_iou": 0.259765625, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 308879412, + "step": 3185 + }, + { + "epoch": 0.3114978490418459, + "grad_norm": 3.4800932057585254, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 308975836, + "step": 3186 + }, + { + "epoch": 0.3114978490418459, + "loss": 0.08703464269638062, + "loss_ce": 0.012541240081191063, + "loss_iou": 0.2236328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 308975836, + "step": 3186 + }, + { + "epoch": 0.31159561986703166, + "grad_norm": 6.6356924486420965, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 309073308, + "step": 3187 + }, + { + "epoch": 0.31159561986703166, + "loss": 0.07030418515205383, + "loss_ce": 0.005896838381886482, + "loss_iou": 0.2294921875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 309073308, + "step": 3187 + }, + { + "epoch": 0.31169339069221746, + "grad_norm": 9.27750368786209, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 309170136, + "step": 3188 + }, + { + "epoch": 0.31169339069221746, + "loss": 0.06594493240118027, + "loss_ce": 0.0031702728010714054, + "loss_iou": 0.400390625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 309170136, + "step": 3188 + }, + { + "epoch": 0.3117911615174032, + "grad_norm": 29.671038760022522, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 309267772, + "step": 3189 + }, + { + "epoch": 0.3117911615174032, + "loss": 0.1065678596496582, + "loss_ce": 0.004578119143843651, + "loss_iou": 0.3515625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 309267772, + "step": 3189 + }, + { + "epoch": 0.31188893234258896, + "grad_norm": 31.487403904050968, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 309364840, + "step": 3190 + }, + { + "epoch": 0.31188893234258896, + "loss": 0.07306435704231262, + "loss_ce": 0.002858672756701708, + "loss_iou": 0.29296875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 309364840, + "step": 3190 + }, + { + "epoch": 0.3119867031677747, + "grad_norm": 17.529745331619424, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 309461960, + "step": 3191 + }, + { + "epoch": 0.3119867031677747, + "loss": 0.04473366588354111, + "loss_ce": 0.001749653834849596, + "loss_iou": 0.29296875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 309461960, + "step": 3191 + }, + { + "epoch": 0.3120844739929605, + "grad_norm": 10.88155926971459, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 309559976, + "step": 3192 + }, + { + "epoch": 0.3120844739929605, + "loss": 0.1017821878194809, + "loss_ce": 0.004553184844553471, + "loss_iou": 0.34375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 309559976, + "step": 3192 + }, + { + "epoch": 0.31218224481814627, + "grad_norm": 4.156275707526384, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 309656864, + "step": 3193 + }, + { + "epoch": 0.31218224481814627, + "loss": 0.11633459478616714, + "loss_ce": 0.010606445372104645, + "loss_iou": 0.26171875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 309656864, + "step": 3193 + }, + { + "epoch": 0.312280015643332, + "grad_norm": 3.7028318146629373, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 309753832, + "step": 3194 + }, + { + "epoch": 0.312280015643332, + "loss": 0.09394148737192154, + "loss_ce": 0.009850302711129189, + "loss_iou": 0.2373046875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 309753832, + "step": 3194 + }, + { + "epoch": 0.31237778646851777, + "grad_norm": 7.08832065824274, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 309849664, + "step": 3195 + }, + { + "epoch": 0.31237778646851777, + "loss": 0.03964362293481827, + "loss_ce": 0.003617619164288044, + "loss_iou": 0.263671875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 309849664, + "step": 3195 + }, + { + "epoch": 0.3124755572937036, + "grad_norm": 9.258976569022405, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 309946108, + "step": 3196 + }, + { + "epoch": 0.3124755572937036, + "loss": 0.12028439342975616, + "loss_ce": 0.005080527625977993, + "loss_iou": 0.306640625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 309946108, + "step": 3196 + }, + { + "epoch": 0.3125733281188893, + "grad_norm": 25.46024865385751, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 310042600, + "step": 3197 + }, + { + "epoch": 0.3125733281188893, + "loss": 0.08597606420516968, + "loss_ce": 0.010155141353607178, + "loss_iou": 0.37109375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 310042600, + "step": 3197 + }, + { + "epoch": 0.3126710989440751, + "grad_norm": 20.079100220675496, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 310138908, + "step": 3198 + }, + { + "epoch": 0.3126710989440751, + "loss": 0.12045706808567047, + "loss_ce": 0.00749625451862812, + "loss_iou": 0.283203125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 310138908, + "step": 3198 + }, + { + "epoch": 0.3127688697692608, + "grad_norm": 5.9627622022067746, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 310236496, + "step": 3199 + }, + { + "epoch": 0.3127688697692608, + "loss": 0.07144489139318466, + "loss_ce": 0.003650091588497162, + "loss_iou": 0.35546875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 310236496, + "step": 3199 + }, + { + "epoch": 0.31286664059444663, + "grad_norm": 18.95726332610839, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 310333408, + "step": 3200 + }, + { + "epoch": 0.31286664059444663, + "loss": 0.11418680846691132, + "loss_ce": 0.0029807593673467636, + "loss_iou": 0.310546875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 310333408, + "step": 3200 + }, + { + "epoch": 0.3129644114196324, + "grad_norm": 41.91709728359512, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 310429564, + "step": 3201 + }, + { + "epoch": 0.3129644114196324, + "loss": 0.12504154443740845, + "loss_ce": 0.004970137495547533, + "loss_iou": 0.380859375, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 310429564, + "step": 3201 + }, + { + "epoch": 0.31306218224481813, + "grad_norm": 5.803576591552748, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 310525668, + "step": 3202 + }, + { + "epoch": 0.31306218224481813, + "loss": 0.09076234698295593, + "loss_ce": 0.002953733317553997, + "loss_iou": 0.2060546875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 310525668, + "step": 3202 + }, + { + "epoch": 0.31315995307000394, + "grad_norm": 4.50226462546494, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 310622932, + "step": 3203 + }, + { + "epoch": 0.31315995307000394, + "loss": 0.07184508442878723, + "loss_ce": 0.0027990604285150766, + "loss_iou": 0.263671875, + "loss_num": 0.01385498046875, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 310622932, + "step": 3203 + }, + { + "epoch": 0.3132577238951897, + "grad_norm": 7.609929981985186, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 310719924, + "step": 3204 + }, + { + "epoch": 0.3132577238951897, + "loss": 0.12542639672756195, + "loss_ce": 0.0128470528870821, + "loss_iou": 0.2333984375, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 310719924, + "step": 3204 + }, + { + "epoch": 0.31335549472037544, + "grad_norm": 3.6092317446987146, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 310816316, + "step": 3205 + }, + { + "epoch": 0.31335549472037544, + "loss": 0.1348058134317398, + "loss_ce": 0.0071965549141168594, + "loss_iou": 0.36328125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 310816316, + "step": 3205 + }, + { + "epoch": 0.3134532655455612, + "grad_norm": 16.271535951382358, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 310912892, + "step": 3206 + }, + { + "epoch": 0.3134532655455612, + "loss": 0.08326072245836258, + "loss_ce": 0.004777135327458382, + "loss_iou": 0.298828125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 310912892, + "step": 3206 + }, + { + "epoch": 0.313551036370747, + "grad_norm": 19.915974362884885, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 311010016, + "step": 3207 + }, + { + "epoch": 0.313551036370747, + "loss": 0.10148058831691742, + "loss_ce": 0.0055638328194618225, + "loss_iou": 0.34375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 311010016, + "step": 3207 + }, + { + "epoch": 0.31364880719593274, + "grad_norm": 14.974084647094692, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 311106852, + "step": 3208 + }, + { + "epoch": 0.31364880719593274, + "loss": 0.10524963587522507, + "loss_ce": 0.0030157463625073433, + "loss_iou": 0.3046875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 311106852, + "step": 3208 + }, + { + "epoch": 0.3137465780211185, + "grad_norm": 8.81983444252494, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 311204120, + "step": 3209 + }, + { + "epoch": 0.3137465780211185, + "loss": 0.10858301818370819, + "loss_ce": 0.004403635859489441, + "loss_iou": 0.33203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 311204120, + "step": 3209 + }, + { + "epoch": 0.31384434884630424, + "grad_norm": 12.828960736609126, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 311300692, + "step": 3210 + }, + { + "epoch": 0.31384434884630424, + "loss": 0.12313871085643768, + "loss_ce": 0.002304361667484045, + "loss_iou": 0.310546875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 311300692, + "step": 3210 + }, + { + "epoch": 0.31394211967149005, + "grad_norm": 34.27823481508809, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 311398192, + "step": 3211 + }, + { + "epoch": 0.31394211967149005, + "loss": 0.11675796657800674, + "loss_ce": 0.010068513453006744, + "loss_iou": 0.369140625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 311398192, + "step": 3211 + }, + { + "epoch": 0.3140398904966758, + "grad_norm": 7.802495802877765, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 311495148, + "step": 3212 + }, + { + "epoch": 0.3140398904966758, + "loss": 0.07542775571346283, + "loss_ce": 0.002654778305441141, + "loss_iou": 0.380859375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 311495148, + "step": 3212 + }, + { + "epoch": 0.31413766132186155, + "grad_norm": 5.2242509954875835, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 311591452, + "step": 3213 + }, + { + "epoch": 0.31413766132186155, + "loss": 0.06568922102451324, + "loss_ce": 0.007492197677493095, + "loss_iou": 0.337890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 311591452, + "step": 3213 + }, + { + "epoch": 0.3142354321470473, + "grad_norm": 17.39276850859715, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 311688604, + "step": 3214 + }, + { + "epoch": 0.3142354321470473, + "loss": 0.11677772551774979, + "loss_ce": 0.001695939339697361, + "loss_iou": 0.322265625, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 311688604, + "step": 3214 + }, + { + "epoch": 0.3143332029722331, + "grad_norm": 6.089293215855413, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 311786220, + "step": 3215 + }, + { + "epoch": 0.3143332029722331, + "loss": 0.07005947828292847, + "loss_ce": 0.003317534225061536, + "loss_iou": 0.390625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 311786220, + "step": 3215 + }, + { + "epoch": 0.31443097379741886, + "grad_norm": 4.360945113703874, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 311883292, + "step": 3216 + }, + { + "epoch": 0.31443097379741886, + "loss": 0.05786097049713135, + "loss_ce": 0.0068165091797709465, + "loss_iou": 0.380859375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 311883292, + "step": 3216 + }, + { + "epoch": 0.3145287446226046, + "grad_norm": 15.599811946790524, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 311980160, + "step": 3217 + }, + { + "epoch": 0.3145287446226046, + "loss": 0.10241008549928665, + "loss_ce": 0.003700976725667715, + "loss_iou": 0.40625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 311980160, + "step": 3217 + }, + { + "epoch": 0.31462651544779036, + "grad_norm": 17.540220693431372, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 312077884, + "step": 3218 + }, + { + "epoch": 0.31462651544779036, + "loss": 0.10852174460887909, + "loss_ce": 0.004700935445725918, + "loss_iou": 0.451171875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 312077884, + "step": 3218 + }, + { + "epoch": 0.31472428627297616, + "grad_norm": 3.9436160973494907, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 312175324, + "step": 3219 + }, + { + "epoch": 0.31472428627297616, + "loss": 0.10789228230714798, + "loss_ce": 0.00950360856950283, + "loss_iou": 0.392578125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 312175324, + "step": 3219 + }, + { + "epoch": 0.3148220570981619, + "grad_norm": 7.415483030900824, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 312272652, + "step": 3220 + }, + { + "epoch": 0.3148220570981619, + "loss": 0.09305758774280548, + "loss_ce": 0.005670507438480854, + "loss_iou": 0.310546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 312272652, + "step": 3220 + }, + { + "epoch": 0.31491982792334766, + "grad_norm": 9.130858842664233, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 312369452, + "step": 3221 + }, + { + "epoch": 0.31491982792334766, + "loss": 0.0659172385931015, + "loss_ce": 0.004607426002621651, + "loss_iou": 0.29296875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 312369452, + "step": 3221 + }, + { + "epoch": 0.3150175987485334, + "grad_norm": 8.044011411207299, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 312466356, + "step": 3222 + }, + { + "epoch": 0.3150175987485334, + "loss": 0.08311314880847931, + "loss_ce": 0.006437736097723246, + "loss_iou": 0.263671875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 312466356, + "step": 3222 + }, + { + "epoch": 0.3151153695737192, + "grad_norm": 3.4076792989492053, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 312563148, + "step": 3223 + }, + { + "epoch": 0.3151153695737192, + "loss": 0.11487787961959839, + "loss_ce": 0.006387884728610516, + "loss_iou": 0.29296875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 312563148, + "step": 3223 + }, + { + "epoch": 0.31521314039890497, + "grad_norm": 8.041538353347052, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 312660180, + "step": 3224 + }, + { + "epoch": 0.31521314039890497, + "loss": 0.1025322824716568, + "loss_ce": 0.0126274973154068, + "loss_iou": 0.287109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 312660180, + "step": 3224 + }, + { + "epoch": 0.3153109112240907, + "grad_norm": 16.261647224694496, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 312756848, + "step": 3225 + }, + { + "epoch": 0.3153109112240907, + "loss": 0.07339351624250412, + "loss_ce": 0.006908109877258539, + "loss_iou": 0.2490234375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 312756848, + "step": 3225 + }, + { + "epoch": 0.3154086820492765, + "grad_norm": 2.579741983402479, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 312854044, + "step": 3226 + }, + { + "epoch": 0.3154086820492765, + "loss": 0.11477690935134888, + "loss_ce": 0.009460745379328728, + "loss_iou": 0.330078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 312854044, + "step": 3226 + }, + { + "epoch": 0.3155064528744623, + "grad_norm": 8.549020912694544, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 312951480, + "step": 3227 + }, + { + "epoch": 0.3155064528744623, + "loss": 0.07227319478988647, + "loss_ce": 0.008651670068502426, + "loss_iou": 0.283203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 312951480, + "step": 3227 + }, + { + "epoch": 0.315604223699648, + "grad_norm": 6.195655900839922, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 313047928, + "step": 3228 + }, + { + "epoch": 0.315604223699648, + "loss": 0.08900775015354156, + "loss_ce": 0.004504580982029438, + "loss_iou": 0.43359375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 313047928, + "step": 3228 + }, + { + "epoch": 0.3157019945248338, + "grad_norm": 40.15509600800118, + "learning_rate": 5e-05, + "loss": 0.1345, + "num_input_tokens_seen": 313145152, + "step": 3229 + }, + { + "epoch": 0.3157019945248338, + "loss": 0.15026873350143433, + "loss_ce": 0.009056271985173225, + "loss_iou": 0.35546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 313145152, + "step": 3229 + }, + { + "epoch": 0.3157997653500196, + "grad_norm": 42.35552220757065, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 313242200, + "step": 3230 + }, + { + "epoch": 0.3157997653500196, + "loss": 0.13385626673698425, + "loss_ce": 0.007605045568197966, + "loss_iou": 0.44140625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 313242200, + "step": 3230 + }, + { + "epoch": 0.31589753617520533, + "grad_norm": 5.1988642901345425, + "learning_rate": 5e-05, + "loss": 0.1214, + "num_input_tokens_seen": 313338956, + "step": 3231 + }, + { + "epoch": 0.31589753617520533, + "loss": 0.144610196352005, + "loss_ce": 0.003313823137432337, + "loss_iou": 0.35546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 313338956, + "step": 3231 + }, + { + "epoch": 0.3159953070003911, + "grad_norm": 12.358301100846573, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 313437120, + "step": 3232 + }, + { + "epoch": 0.3159953070003911, + "loss": 0.06125468760728836, + "loss_ce": 0.0041257780976593494, + "loss_iou": 0.3515625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 313437120, + "step": 3232 + }, + { + "epoch": 0.31609307782557683, + "grad_norm": 30.261579977423935, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 313534384, + "step": 3233 + }, + { + "epoch": 0.31609307782557683, + "loss": 0.0896366536617279, + "loss_ce": 0.0022190529853105545, + "loss_iou": 0.345703125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 313534384, + "step": 3233 + }, + { + "epoch": 0.31619084865076263, + "grad_norm": 4.513931537954566, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 313630484, + "step": 3234 + }, + { + "epoch": 0.31619084865076263, + "loss": 0.09823314845561981, + "loss_ce": 0.0033539982978254557, + "loss_iou": 0.291015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 313630484, + "step": 3234 + }, + { + "epoch": 0.3162886194759484, + "grad_norm": 2.7351782242710327, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 313727360, + "step": 3235 + }, + { + "epoch": 0.3162886194759484, + "loss": 0.12733663618564606, + "loss_ce": 0.0047322651371359825, + "loss_iou": 0.24609375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 313727360, + "step": 3235 + }, + { + "epoch": 0.31638639030113413, + "grad_norm": 9.003986254253014, + "learning_rate": 5e-05, + "loss": 0.1159, + "num_input_tokens_seen": 313823872, + "step": 3236 + }, + { + "epoch": 0.31638639030113413, + "loss": 0.12568189203739166, + "loss_ce": 0.0065107448026537895, + "loss_iou": 0.30078125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 313823872, + "step": 3236 + }, + { + "epoch": 0.3164841611263199, + "grad_norm": 6.46395098126792, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 313921804, + "step": 3237 + }, + { + "epoch": 0.3164841611263199, + "loss": 0.13723430037498474, + "loss_ce": 0.007290451321750879, + "loss_iou": 0.373046875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 313921804, + "step": 3237 + }, + { + "epoch": 0.3165819319515057, + "grad_norm": 7.818789564535002, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 314019080, + "step": 3238 + }, + { + "epoch": 0.3165819319515057, + "loss": 0.08992907404899597, + "loss_ce": 0.0035872124135494232, + "loss_iou": 0.3203125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 314019080, + "step": 3238 + }, + { + "epoch": 0.31667970277669144, + "grad_norm": 2.1205978161886505, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 314116084, + "step": 3239 + }, + { + "epoch": 0.31667970277669144, + "loss": 0.09505969285964966, + "loss_ce": 0.009015381336212158, + "loss_iou": 0.287109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 314116084, + "step": 3239 + }, + { + "epoch": 0.3167774736018772, + "grad_norm": 2.397982125357076, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 314213200, + "step": 3240 + }, + { + "epoch": 0.3167774736018772, + "loss": 0.06315557658672333, + "loss_ce": 0.004287172108888626, + "loss_iou": 0.291015625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 314213200, + "step": 3240 + }, + { + "epoch": 0.31687524442706294, + "grad_norm": 2.3561172528131724, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 314310564, + "step": 3241 + }, + { + "epoch": 0.31687524442706294, + "loss": 0.0530041940510273, + "loss_ce": 0.007746626622974873, + "loss_iou": 0.353515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 314310564, + "step": 3241 + }, + { + "epoch": 0.31697301525224875, + "grad_norm": 5.3405853430725045, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 314407332, + "step": 3242 + }, + { + "epoch": 0.31697301525224875, + "loss": 0.05941396206617355, + "loss_ce": 0.008617452345788479, + "loss_iou": 0.33984375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 314407332, + "step": 3242 + }, + { + "epoch": 0.3170707860774345, + "grad_norm": 3.42403996991017, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 314504836, + "step": 3243 + }, + { + "epoch": 0.3170707860774345, + "loss": 0.1113179624080658, + "loss_ce": 0.0047200568951666355, + "loss_iou": 0.380859375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 314504836, + "step": 3243 + }, + { + "epoch": 0.31716855690262025, + "grad_norm": 3.344226703181608, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 314602144, + "step": 3244 + }, + { + "epoch": 0.31716855690262025, + "loss": 0.0589229092001915, + "loss_ce": 0.003869198262691498, + "loss_iou": 0.470703125, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 314602144, + "step": 3244 + }, + { + "epoch": 0.317266327727806, + "grad_norm": 3.529443166821118, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 314699936, + "step": 3245 + }, + { + "epoch": 0.317266327727806, + "loss": 0.06029953435063362, + "loss_ce": 0.003750462085008621, + "loss_iou": 0.39453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 314699936, + "step": 3245 + }, + { + "epoch": 0.3173640985529918, + "grad_norm": 11.588642564299711, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 314796204, + "step": 3246 + }, + { + "epoch": 0.3173640985529918, + "loss": 0.1019328162074089, + "loss_ce": 0.006084735970944166, + "loss_iou": 0.1572265625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 314796204, + "step": 3246 + }, + { + "epoch": 0.31746186937817755, + "grad_norm": 3.95579876540588, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 314892800, + "step": 3247 + }, + { + "epoch": 0.31746186937817755, + "loss": 0.08963656425476074, + "loss_ce": 0.008856533095240593, + "loss_iou": 0.353515625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 314892800, + "step": 3247 + }, + { + "epoch": 0.3175596402033633, + "grad_norm": 3.8559339728155324, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 314989828, + "step": 3248 + }, + { + "epoch": 0.3175596402033633, + "loss": 0.11243203282356262, + "loss_ce": 0.0029921820387244225, + "loss_iou": 0.232421875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 314989828, + "step": 3248 + }, + { + "epoch": 0.3176574110285491, + "grad_norm": 4.360226821221218, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 315086140, + "step": 3249 + }, + { + "epoch": 0.3176574110285491, + "loss": 0.03896500542759895, + "loss_ce": 0.0063417162746191025, + "loss_iou": 0.27734375, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 315086140, + "step": 3249 + }, + { + "epoch": 0.31775518185373486, + "grad_norm": 3.215364122576032, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 315183056, + "step": 3250 + }, + { + "epoch": 0.31775518185373486, + "eval_seeclick_CIoU": 0.47299252450466156, + "eval_seeclick_GIoU": 0.4740593284368515, + "eval_seeclick_IoU": 0.5258722007274628, + "eval_seeclick_MAE_all": 0.09172597154974937, + "eval_seeclick_MAE_h": 0.04961076006293297, + "eval_seeclick_MAE_w": 0.12737275660037994, + "eval_seeclick_MAE_x": 0.1425490379333496, + "eval_seeclick_MAE_y": 0.047371331602334976, + "eval_seeclick_NUM_probability": 0.9999872744083405, + "eval_seeclick_inside_bbox": 0.7684659063816071, + "eval_seeclick_loss": 0.32662084698677063, + "eval_seeclick_loss_ce": 0.010470373556017876, + "eval_seeclick_loss_iou": 0.4267578125, + "eval_seeclick_loss_num": 0.0639801025390625, + "eval_seeclick_loss_xval": 0.31964111328125, + "eval_seeclick_runtime": 73.499, + "eval_seeclick_samples_per_second": 0.585, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 315183056, + "step": 3250 + }, + { + "epoch": 0.31775518185373486, + "eval_icons_CIoU": 0.6435346305370331, + "eval_icons_GIoU": 0.6431714296340942, + "eval_icons_IoU": 0.6759227514266968, + "eval_icons_MAE_all": 0.05822799168527126, + "eval_icons_MAE_h": 0.06280772760510445, + "eval_icons_MAE_w": 0.05552307143807411, + "eval_icons_MAE_x": 0.05357634648680687, + "eval_icons_MAE_y": 0.06100480258464813, + "eval_icons_NUM_probability": 0.9999864995479584, + "eval_icons_inside_bbox": 0.7482638955116272, + "eval_icons_loss": 0.18302489817142487, + "eval_icons_loss_ce": 1.8427562281431165e-05, + "eval_icons_loss_iou": 0.3533935546875, + "eval_icons_loss_num": 0.038265228271484375, + "eval_icons_loss_xval": 0.191253662109375, + "eval_icons_runtime": 90.2214, + "eval_icons_samples_per_second": 0.554, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 315183056, + "step": 3250 + }, + { + "epoch": 0.31775518185373486, + "eval_screenspot_CIoU": 0.31754059592882794, + "eval_screenspot_GIoU": 0.28942766785621643, + "eval_screenspot_IoU": 0.39884666601816815, + "eval_screenspot_MAE_all": 0.16085790594418845, + "eval_screenspot_MAE_h": 0.12756236642599106, + "eval_screenspot_MAE_w": 0.20642825961112976, + "eval_screenspot_MAE_x": 0.17723717292149863, + "eval_screenspot_MAE_y": 0.13220379501581192, + "eval_screenspot_NUM_probability": 0.9999721447626749, + "eval_screenspot_inside_bbox": 0.606250007947286, + "eval_screenspot_loss": 0.5689038038253784, + "eval_screenspot_loss_ce": 0.01846614417930444, + "eval_screenspot_loss_iou": 0.3453776041666667, + "eval_screenspot_loss_num": 0.10906982421875, + "eval_screenspot_loss_xval": 0.5450439453125, + "eval_screenspot_runtime": 153.2076, + "eval_screenspot_samples_per_second": 0.581, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 315183056, + "step": 3250 + }, + { + "epoch": 0.31775518185373486, + "eval_compot_CIoU": 0.45241579413414, + "eval_compot_GIoU": 0.4511125087738037, + "eval_compot_IoU": 0.50737464427948, + "eval_compot_MAE_all": 0.08848259970545769, + "eval_compot_MAE_h": 0.0786395687609911, + "eval_compot_MAE_w": 0.09972351789474487, + "eval_compot_MAE_x": 0.0979236550629139, + "eval_compot_MAE_y": 0.07764366641640663, + "eval_compot_NUM_probability": 0.9999779164791107, + "eval_compot_inside_bbox": 0.6770833432674408, + "eval_compot_loss": 0.2905006408691406, + "eval_compot_loss_ce": 0.015137756243348122, + "eval_compot_loss_iou": 0.51861572265625, + "eval_compot_loss_num": 0.048061370849609375, + "eval_compot_loss_xval": 0.24017333984375, + "eval_compot_runtime": 93.3473, + "eval_compot_samples_per_second": 0.536, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 315183056, + "step": 3250 + }, + { + "epoch": 0.31775518185373486, + "loss": 0.22142432630062103, + "loss_ce": 0.015552746132016182, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 315183056, + "step": 3250 + }, + { + "epoch": 0.3178529526789206, + "grad_norm": 3.086667970264982, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 315279452, + "step": 3251 + }, + { + "epoch": 0.3178529526789206, + "loss": 0.07433082163333893, + "loss_ce": 0.008824840188026428, + "loss_iou": 0.333984375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 315279452, + "step": 3251 + }, + { + "epoch": 0.31795072350410636, + "grad_norm": 4.451748743797729, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 315376724, + "step": 3252 + }, + { + "epoch": 0.31795072350410636, + "loss": 0.06840170174837112, + "loss_ce": 0.0051387581042945385, + "loss_iou": 0.35546875, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 315376724, + "step": 3252 + }, + { + "epoch": 0.31804849432929216, + "grad_norm": 19.92831421821782, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 315473672, + "step": 3253 + }, + { + "epoch": 0.31804849432929216, + "loss": 0.13200300931930542, + "loss_ce": 0.004927813075482845, + "loss_iou": 0.44921875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 315473672, + "step": 3253 + }, + { + "epoch": 0.3181462651544779, + "grad_norm": 15.236741059235328, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 315571100, + "step": 3254 + }, + { + "epoch": 0.3181462651544779, + "loss": 0.11765260994434357, + "loss_ce": 0.006538107059895992, + "loss_iou": 0.40234375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 315571100, + "step": 3254 + }, + { + "epoch": 0.31824403597966366, + "grad_norm": 21.585618752108637, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 315668652, + "step": 3255 + }, + { + "epoch": 0.31824403597966366, + "loss": 0.1608467698097229, + "loss_ce": 0.00612263847142458, + "loss_iou": 0.31640625, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 315668652, + "step": 3255 + }, + { + "epoch": 0.3183418068048494, + "grad_norm": 16.120211925450366, + "learning_rate": 5e-05, + "loss": 0.1327, + "num_input_tokens_seen": 315766108, + "step": 3256 + }, + { + "epoch": 0.3183418068048494, + "loss": 0.16096460819244385, + "loss_ce": 0.0031582098454236984, + "loss_iou": 0.380859375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 315766108, + "step": 3256 + }, + { + "epoch": 0.3184395776300352, + "grad_norm": 12.312353882349878, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 315863176, + "step": 3257 + }, + { + "epoch": 0.3184395776300352, + "loss": 0.06025787442922592, + "loss_ce": 0.002594906371086836, + "loss_iou": 0.306640625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 315863176, + "step": 3257 + }, + { + "epoch": 0.31853734845522097, + "grad_norm": 29.773641615365115, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 315961120, + "step": 3258 + }, + { + "epoch": 0.31853734845522097, + "loss": 0.09791182726621628, + "loss_ce": 0.0036430303007364273, + "loss_iou": 0.37890625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 315961120, + "step": 3258 + }, + { + "epoch": 0.3186351192804067, + "grad_norm": 4.0930196609583405, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 316058100, + "step": 3259 + }, + { + "epoch": 0.3186351192804067, + "loss": 0.11938224732875824, + "loss_ce": 0.006696091964840889, + "loss_iou": 0.412109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 316058100, + "step": 3259 + }, + { + "epoch": 0.31873289010559247, + "grad_norm": 10.201600737193356, + "learning_rate": 5e-05, + "loss": 0.1223, + "num_input_tokens_seen": 316154112, + "step": 3260 + }, + { + "epoch": 0.31873289010559247, + "loss": 0.07671955972909927, + "loss_ce": 0.007627760525792837, + "loss_iou": 0.267578125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 316154112, + "step": 3260 + }, + { + "epoch": 0.3188306609307783, + "grad_norm": 3.7515316920766564, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 316250512, + "step": 3261 + }, + { + "epoch": 0.3188306609307783, + "loss": 0.11689393222332001, + "loss_ce": 0.010341803543269634, + "loss_iou": 0.30078125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 316250512, + "step": 3261 + }, + { + "epoch": 0.318928431755964, + "grad_norm": 7.590470940526565, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 316347596, + "step": 3262 + }, + { + "epoch": 0.318928431755964, + "loss": 0.08697699010372162, + "loss_ce": 0.007783879525959492, + "loss_iou": 0.310546875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 316347596, + "step": 3262 + }, + { + "epoch": 0.3190262025811498, + "grad_norm": 2.6975636414492095, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 316444940, + "step": 3263 + }, + { + "epoch": 0.3190262025811498, + "loss": 0.0988948866724968, + "loss_ce": 0.007174305152148008, + "loss_iou": 0.2734375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 316444940, + "step": 3263 + }, + { + "epoch": 0.3191239734063355, + "grad_norm": 4.379092002720627, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 316541828, + "step": 3264 + }, + { + "epoch": 0.3191239734063355, + "loss": 0.09374164044857025, + "loss_ce": 0.006095150485634804, + "loss_iou": 0.3046875, + "loss_num": 0.0174560546875, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 316541828, + "step": 3264 + }, + { + "epoch": 0.31922174423152133, + "grad_norm": 2.9290795441621538, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 316639904, + "step": 3265 + }, + { + "epoch": 0.31922174423152133, + "loss": 0.057481709867715836, + "loss_ce": 0.004342977423220873, + "loss_iou": 0.265625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 316639904, + "step": 3265 + }, + { + "epoch": 0.3193195150567071, + "grad_norm": 9.83096620231238, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 316735620, + "step": 3266 + }, + { + "epoch": 0.3193195150567071, + "loss": 0.11830179393291473, + "loss_ce": 0.0024418041575700045, + "loss_iou": 0.287109375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 316735620, + "step": 3266 + }, + { + "epoch": 0.31941728588189283, + "grad_norm": 7.252723365637439, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 316832728, + "step": 3267 + }, + { + "epoch": 0.31941728588189283, + "loss": 0.09769377112388611, + "loss_ce": 0.004096359480172396, + "loss_iou": 0.287109375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 316832728, + "step": 3267 + }, + { + "epoch": 0.3195150567070786, + "grad_norm": 17.8041055745415, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 316931344, + "step": 3268 + }, + { + "epoch": 0.3195150567070786, + "loss": 0.09534891694784164, + "loss_ce": 0.008953655138611794, + "loss_iou": 0.369140625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 316931344, + "step": 3268 + }, + { + "epoch": 0.3196128275322644, + "grad_norm": 13.57612028251763, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 317028144, + "step": 3269 + }, + { + "epoch": 0.3196128275322644, + "loss": 0.08293090760707855, + "loss_ce": 0.006270754151046276, + "loss_iou": 0.408203125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 317028144, + "step": 3269 + }, + { + "epoch": 0.31971059835745014, + "grad_norm": 8.33743864300471, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 317125128, + "step": 3270 + }, + { + "epoch": 0.31971059835745014, + "loss": 0.1039794534444809, + "loss_ce": 0.004888877272605896, + "loss_iou": 0.44921875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 317125128, + "step": 3270 + }, + { + "epoch": 0.3198083691826359, + "grad_norm": 7.268409825544274, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 317221968, + "step": 3271 + }, + { + "epoch": 0.3198083691826359, + "loss": 0.10361167043447495, + "loss_ce": 0.00679465476423502, + "loss_iou": 0.341796875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 317221968, + "step": 3271 + }, + { + "epoch": 0.3199061400078217, + "grad_norm": 4.711385865611499, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 317319492, + "step": 3272 + }, + { + "epoch": 0.3199061400078217, + "loss": 0.08160197734832764, + "loss_ce": 0.002103690057992935, + "loss_iou": 0.294921875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 317319492, + "step": 3272 + }, + { + "epoch": 0.32000391083300744, + "grad_norm": 4.020383781486361, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 317416208, + "step": 3273 + }, + { + "epoch": 0.32000391083300744, + "loss": 0.06420844793319702, + "loss_ce": 0.007155833300203085, + "loss_iou": 0.263671875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 317416208, + "step": 3273 + }, + { + "epoch": 0.3201016816581932, + "grad_norm": 3.764063212556849, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 317513132, + "step": 3274 + }, + { + "epoch": 0.3201016816581932, + "loss": 0.06997663527727127, + "loss_ce": 0.006042307708412409, + "loss_iou": 0.33203125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 317513132, + "step": 3274 + }, + { + "epoch": 0.32019945248337894, + "grad_norm": 6.7180886598718095, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 317609508, + "step": 3275 + }, + { + "epoch": 0.32019945248337894, + "loss": 0.12522244453430176, + "loss_ce": 0.007424592040479183, + "loss_iou": 0.201171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 317609508, + "step": 3275 + }, + { + "epoch": 0.32029722330856475, + "grad_norm": 5.540016923454577, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 317705536, + "step": 3276 + }, + { + "epoch": 0.32029722330856475, + "loss": 0.0749388188123703, + "loss_ce": 0.005389263853430748, + "loss_iou": 0.2001953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 317705536, + "step": 3276 + }, + { + "epoch": 0.3203949941337505, + "grad_norm": 4.258087837721792, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 317802228, + "step": 3277 + }, + { + "epoch": 0.3203949941337505, + "loss": 0.13203024864196777, + "loss_ce": 0.004817724693566561, + "loss_iou": 0.265625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 317802228, + "step": 3277 + }, + { + "epoch": 0.32049276495893625, + "grad_norm": 6.003884469028759, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 317898976, + "step": 3278 + }, + { + "epoch": 0.32049276495893625, + "loss": 0.06122162193059921, + "loss_ce": 0.006656194105744362, + "loss_iou": 0.25390625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 317898976, + "step": 3278 + }, + { + "epoch": 0.320590535784122, + "grad_norm": 15.242944563294799, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 317994772, + "step": 3279 + }, + { + "epoch": 0.320590535784122, + "loss": 0.0695701390504837, + "loss_ce": 0.003285961924120784, + "loss_iou": 0.255859375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 317994772, + "step": 3279 + }, + { + "epoch": 0.3206883066093078, + "grad_norm": 5.083174044481527, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 318092568, + "step": 3280 + }, + { + "epoch": 0.3206883066093078, + "loss": 0.06373926252126694, + "loss_ce": 0.006686653010547161, + "loss_iou": 0.37109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 318092568, + "step": 3280 + }, + { + "epoch": 0.32078607743449356, + "grad_norm": 8.75051185362446, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 318189928, + "step": 3281 + }, + { + "epoch": 0.32078607743449356, + "loss": 0.10117828845977783, + "loss_ce": 0.0032168591860681772, + "loss_iou": 0.349609375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 318189928, + "step": 3281 + }, + { + "epoch": 0.3208838482596793, + "grad_norm": 13.954156911588191, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 318287412, + "step": 3282 + }, + { + "epoch": 0.3208838482596793, + "loss": 0.09902699291706085, + "loss_ce": 0.008275349624454975, + "loss_iou": 0.31640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 318287412, + "step": 3282 + }, + { + "epoch": 0.32098161908486506, + "grad_norm": 5.819339587597705, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 318384144, + "step": 3283 + }, + { + "epoch": 0.32098161908486506, + "loss": 0.0746966078877449, + "loss_ce": 0.004506176337599754, + "loss_iou": 0.392578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 318384144, + "step": 3283 + }, + { + "epoch": 0.32107938991005086, + "grad_norm": 4.911701379511644, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 318481052, + "step": 3284 + }, + { + "epoch": 0.32107938991005086, + "loss": 0.12039411067962646, + "loss_ce": 0.00909650232642889, + "loss_iou": 0.330078125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 318481052, + "step": 3284 + }, + { + "epoch": 0.3211771607352366, + "grad_norm": 18.498064541142295, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 318578616, + "step": 3285 + }, + { + "epoch": 0.3211771607352366, + "loss": 0.08160713315010071, + "loss_ce": 0.00769355334341526, + "loss_iou": 0.376953125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 318578616, + "step": 3285 + }, + { + "epoch": 0.32127493156042236, + "grad_norm": 6.1628040222297, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 318675780, + "step": 3286 + }, + { + "epoch": 0.32127493156042236, + "loss": 0.039998698979616165, + "loss_ce": 0.002507852390408516, + "loss_iou": 0.44921875, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 318675780, + "step": 3286 + }, + { + "epoch": 0.3213727023856081, + "grad_norm": 18.747167956229642, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 318772040, + "step": 3287 + }, + { + "epoch": 0.3213727023856081, + "loss": 0.06090965121984482, + "loss_ce": 0.0008815780747681856, + "loss_iou": 0.3671875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 318772040, + "step": 3287 + }, + { + "epoch": 0.3214704732107939, + "grad_norm": 13.185674626350423, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 318869232, + "step": 3288 + }, + { + "epoch": 0.3214704732107939, + "loss": 0.09052326530218124, + "loss_ce": 0.005226634908467531, + "loss_iou": 0.37890625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 318869232, + "step": 3288 + }, + { + "epoch": 0.32156824403597967, + "grad_norm": 9.261155919355723, + "learning_rate": 5e-05, + "loss": 0.1228, + "num_input_tokens_seen": 318965880, + "step": 3289 + }, + { + "epoch": 0.32156824403597967, + "loss": 0.16316181421279907, + "loss_ce": 0.010222965851426125, + "loss_iou": 0.2255859375, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 318965880, + "step": 3289 + }, + { + "epoch": 0.3216660148611654, + "grad_norm": 5.120491058871298, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 319062656, + "step": 3290 + }, + { + "epoch": 0.3216660148611654, + "loss": 0.0674796849489212, + "loss_ce": 0.0035453636664897203, + "loss_iou": 0.39453125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 319062656, + "step": 3290 + }, + { + "epoch": 0.32176378568635117, + "grad_norm": 10.94216797186841, + "learning_rate": 5e-05, + "loss": 0.1296, + "num_input_tokens_seen": 319160208, + "step": 3291 + }, + { + "epoch": 0.32176378568635117, + "loss": 0.10228995978832245, + "loss_ce": 0.007700727321207523, + "loss_iou": 0.328125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 319160208, + "step": 3291 + }, + { + "epoch": 0.321861556511537, + "grad_norm": 5.859029349041892, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 319258208, + "step": 3292 + }, + { + "epoch": 0.321861556511537, + "loss": 0.049832575023174286, + "loss_ce": 0.007825126871466637, + "loss_iou": 0.40234375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 319258208, + "step": 3292 + }, + { + "epoch": 0.3219593273367227, + "grad_norm": 4.352013412335812, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 319355092, + "step": 3293 + }, + { + "epoch": 0.3219593273367227, + "loss": 0.07311294972896576, + "loss_ce": 0.005180821754038334, + "loss_iou": 0.4375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 319355092, + "step": 3293 + }, + { + "epoch": 0.3220570981619085, + "grad_norm": 6.629436776281306, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 319451620, + "step": 3294 + }, + { + "epoch": 0.3220570981619085, + "loss": 0.08050158619880676, + "loss_ce": 0.0051842061802744865, + "loss_iou": 0.30859375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 319451620, + "step": 3294 + }, + { + "epoch": 0.3221548689870943, + "grad_norm": 3.549185492432499, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 319548996, + "step": 3295 + }, + { + "epoch": 0.3221548689870943, + "loss": 0.07065875828266144, + "loss_ce": 0.004710265435278416, + "loss_iou": 0.302734375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 319548996, + "step": 3295 + }, + { + "epoch": 0.32225263981228003, + "grad_norm": 5.401342668377451, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 319646528, + "step": 3296 + }, + { + "epoch": 0.32225263981228003, + "loss": 0.09840056300163269, + "loss_ce": 0.004848930519074202, + "loss_iou": 0.3828125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 319646528, + "step": 3296 + }, + { + "epoch": 0.3223504106374658, + "grad_norm": 11.232829645904475, + "learning_rate": 5e-05, + "loss": 0.1293, + "num_input_tokens_seen": 319743812, + "step": 3297 + }, + { + "epoch": 0.3223504106374658, + "loss": 0.13455438613891602, + "loss_ce": 0.010721688158810139, + "loss_iou": 0.35546875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 319743812, + "step": 3297 + }, + { + "epoch": 0.32244818146265153, + "grad_norm": 11.747569601694313, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 319841540, + "step": 3298 + }, + { + "epoch": 0.32244818146265153, + "loss": 0.07824769616127014, + "loss_ce": 0.004105239175260067, + "loss_iou": 0.404296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 319841540, + "step": 3298 + }, + { + "epoch": 0.32254595228783733, + "grad_norm": 8.617516062545214, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 319938092, + "step": 3299 + }, + { + "epoch": 0.32254595228783733, + "loss": 0.09211109578609467, + "loss_ce": 0.005967610515654087, + "loss_iou": 0.263671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 319938092, + "step": 3299 + }, + { + "epoch": 0.3226437231130231, + "grad_norm": 4.20074589052171, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 320035564, + "step": 3300 + }, + { + "epoch": 0.3226437231130231, + "loss": 0.12726418673992157, + "loss_ce": 0.008184593170881271, + "loss_iou": 0.341796875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 320035564, + "step": 3300 + }, + { + "epoch": 0.32274149393820883, + "grad_norm": 13.73283442401435, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 320133616, + "step": 3301 + }, + { + "epoch": 0.32274149393820883, + "loss": 0.09008294343948364, + "loss_ce": 0.007548152003437281, + "loss_iou": 0.36328125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 320133616, + "step": 3301 + }, + { + "epoch": 0.3228392647633946, + "grad_norm": 11.332798937921204, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 320231428, + "step": 3302 + }, + { + "epoch": 0.3228392647633946, + "loss": 0.06138928234577179, + "loss_ce": 0.0050385757349431515, + "loss_iou": 0.373046875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 320231428, + "step": 3302 + }, + { + "epoch": 0.3229370355885804, + "grad_norm": 43.2269570102033, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 320328476, + "step": 3303 + }, + { + "epoch": 0.3229370355885804, + "loss": 0.1390647292137146, + "loss_ce": 0.00759501289576292, + "loss_iou": 0.376953125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 320328476, + "step": 3303 + }, + { + "epoch": 0.32303480641376614, + "grad_norm": 15.23460381460971, + "learning_rate": 5e-05, + "loss": 0.1351, + "num_input_tokens_seen": 320424540, + "step": 3304 + }, + { + "epoch": 0.32303480641376614, + "loss": 0.10228250920772552, + "loss_ce": 0.0029172776266932487, + "loss_iou": 0.328125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 320424540, + "step": 3304 + }, + { + "epoch": 0.3231325772389519, + "grad_norm": 6.296025597150456, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 320521420, + "step": 3305 + }, + { + "epoch": 0.3231325772389519, + "loss": 0.11836636066436768, + "loss_ce": 0.008289460092782974, + "loss_iou": 0.3359375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 320521420, + "step": 3305 + }, + { + "epoch": 0.32323034806413764, + "grad_norm": 5.061296204461963, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 320617852, + "step": 3306 + }, + { + "epoch": 0.32323034806413764, + "loss": 0.11303824186325073, + "loss_ce": 0.003754799487069249, + "loss_iou": 0.306640625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 320617852, + "step": 3306 + }, + { + "epoch": 0.32332811888932345, + "grad_norm": 8.97498054334034, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 320713524, + "step": 3307 + }, + { + "epoch": 0.32332811888932345, + "loss": 0.1014261320233345, + "loss_ce": 0.009675035253167152, + "loss_iou": 0.19921875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 320713524, + "step": 3307 + }, + { + "epoch": 0.3234258897145092, + "grad_norm": 4.969179905785819, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 320809720, + "step": 3308 + }, + { + "epoch": 0.3234258897145092, + "loss": 0.06108291447162628, + "loss_ce": 0.003267360618337989, + "loss_iou": 0.298828125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 320809720, + "step": 3308 + }, + { + "epoch": 0.32352366053969495, + "grad_norm": 6.421905171863285, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 320905796, + "step": 3309 + }, + { + "epoch": 0.32352366053969495, + "loss": 0.04156983643770218, + "loss_ce": 0.0034037907607853413, + "loss_iou": 0.33203125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 320905796, + "step": 3309 + }, + { + "epoch": 0.3236214313648807, + "grad_norm": 10.559493529921689, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 321003668, + "step": 3310 + }, + { + "epoch": 0.3236214313648807, + "loss": 0.07374543696641922, + "loss_ce": 0.00717134028673172, + "loss_iou": 0.3515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 321003668, + "step": 3310 + }, + { + "epoch": 0.3237192021900665, + "grad_norm": 8.43158013851362, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 321100296, + "step": 3311 + }, + { + "epoch": 0.3237192021900665, + "loss": 0.08762750029563904, + "loss_ce": 0.0037041620817035437, + "loss_iou": 0.484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 321100296, + "step": 3311 + }, + { + "epoch": 0.32381697301525225, + "grad_norm": 9.332162648783875, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 321196320, + "step": 3312 + }, + { + "epoch": 0.32381697301525225, + "loss": 0.08844596147537231, + "loss_ce": 0.010381992906332016, + "loss_iou": 0.408203125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 321196320, + "step": 3312 + }, + { + "epoch": 0.323914743840438, + "grad_norm": 4.275055956896559, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 321293616, + "step": 3313 + }, + { + "epoch": 0.323914743840438, + "loss": 0.059870123863220215, + "loss_ce": 0.006723759230226278, + "loss_iou": 0.333984375, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 321293616, + "step": 3313 + }, + { + "epoch": 0.32401251466562375, + "grad_norm": 3.3705048110956204, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 321389956, + "step": 3314 + }, + { + "epoch": 0.32401251466562375, + "loss": 0.13372130692005157, + "loss_ce": 0.0013970880536362529, + "loss_iou": 0.3984375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 321389956, + "step": 3314 + }, + { + "epoch": 0.32411028549080956, + "grad_norm": 12.464827804857965, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 321486276, + "step": 3315 + }, + { + "epoch": 0.32411028549080956, + "loss": 0.08187663555145264, + "loss_ce": 0.005033367313444614, + "loss_iou": 0.23828125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 321486276, + "step": 3315 + }, + { + "epoch": 0.3242080563159953, + "grad_norm": 10.679108078144345, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 321582552, + "step": 3316 + }, + { + "epoch": 0.3242080563159953, + "loss": 0.07205305993556976, + "loss_ce": 0.006516557186841965, + "loss_iou": 0.17578125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 321582552, + "step": 3316 + }, + { + "epoch": 0.32430582714118106, + "grad_norm": 8.602393067076083, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 321679976, + "step": 3317 + }, + { + "epoch": 0.32430582714118106, + "loss": 0.16361677646636963, + "loss_ce": 0.005108468700200319, + "loss_iou": 0.326171875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 321679976, + "step": 3317 + }, + { + "epoch": 0.32440359796636686, + "grad_norm": 13.845236466912908, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 321777120, + "step": 3318 + }, + { + "epoch": 0.32440359796636686, + "loss": 0.07394609600305557, + "loss_ce": 0.0035573069471865892, + "loss_iou": 0.326171875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 321777120, + "step": 3318 + }, + { + "epoch": 0.3245013687915526, + "grad_norm": 18.658696763499968, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 321874040, + "step": 3319 + }, + { + "epoch": 0.3245013687915526, + "loss": 0.10647815465927124, + "loss_ce": 0.005937992595136166, + "loss_iou": 0.330078125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 321874040, + "step": 3319 + }, + { + "epoch": 0.32459913961673836, + "grad_norm": 15.227429917563184, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 321971284, + "step": 3320 + }, + { + "epoch": 0.32459913961673836, + "loss": 0.06717240810394287, + "loss_ce": 0.006121993996202946, + "loss_iou": 0.28125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 321971284, + "step": 3320 + }, + { + "epoch": 0.3246969104419241, + "grad_norm": 8.453436701872725, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 322068732, + "step": 3321 + }, + { + "epoch": 0.3246969104419241, + "loss": 0.10778786242008209, + "loss_ce": 0.004226461052894592, + "loss_iou": 0.234375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 322068732, + "step": 3321 + }, + { + "epoch": 0.3247946812671099, + "grad_norm": 7.839260867980624, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 322164888, + "step": 3322 + }, + { + "epoch": 0.3247946812671099, + "loss": 0.08982323855161667, + "loss_ce": 0.006739134434610605, + "loss_iou": 0.291015625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 322164888, + "step": 3322 + }, + { + "epoch": 0.32489245209229567, + "grad_norm": 12.786346940908963, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 322263280, + "step": 3323 + }, + { + "epoch": 0.32489245209229567, + "loss": 0.1490439921617508, + "loss_ce": 0.004116012714803219, + "loss_iou": 0.482421875, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 322263280, + "step": 3323 + }, + { + "epoch": 0.3249902229174814, + "grad_norm": 12.0716608218926, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 322359748, + "step": 3324 + }, + { + "epoch": 0.3249902229174814, + "loss": 0.0997924730181694, + "loss_ce": 0.005699151661247015, + "loss_iou": 0.3203125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 322359748, + "step": 3324 + }, + { + "epoch": 0.32508799374266717, + "grad_norm": 2.919536745011835, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 322456056, + "step": 3325 + }, + { + "epoch": 0.32508799374266717, + "loss": 0.09399928152561188, + "loss_ce": 0.006337537430226803, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 322456056, + "step": 3325 + }, + { + "epoch": 0.325185764567853, + "grad_norm": 19.167809472903986, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 322552612, + "step": 3326 + }, + { + "epoch": 0.325185764567853, + "loss": 0.12215308845043182, + "loss_ce": 0.004538345616310835, + "loss_iou": 0.3828125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 322552612, + "step": 3326 + }, + { + "epoch": 0.3252835353930387, + "grad_norm": 6.333405046352871, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 322649696, + "step": 3327 + }, + { + "epoch": 0.3252835353930387, + "loss": 0.09711376577615738, + "loss_ce": 0.005232967436313629, + "loss_iou": 0.359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 322649696, + "step": 3327 + }, + { + "epoch": 0.3253813062182245, + "grad_norm": 3.85660420833482, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 322746232, + "step": 3328 + }, + { + "epoch": 0.3253813062182245, + "loss": 0.06973839551210403, + "loss_ce": 0.004949574358761311, + "loss_iou": 0.353515625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 322746232, + "step": 3328 + }, + { + "epoch": 0.3254790770434102, + "grad_norm": 8.206764711293621, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 322842548, + "step": 3329 + }, + { + "epoch": 0.3254790770434102, + "loss": 0.05182071402668953, + "loss_ce": 0.004777866415679455, + "loss_iou": 0.2734375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 322842548, + "step": 3329 + }, + { + "epoch": 0.32557684786859603, + "grad_norm": 17.19827694390296, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 322939100, + "step": 3330 + }, + { + "epoch": 0.32557684786859603, + "loss": 0.09705290198326111, + "loss_ce": 0.005759562831372023, + "loss_iou": 0.375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 322939100, + "step": 3330 + }, + { + "epoch": 0.3256746186937818, + "grad_norm": 20.39164377831414, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 323035692, + "step": 3331 + }, + { + "epoch": 0.3256746186937818, + "loss": 0.10492992401123047, + "loss_ce": 0.006785394623875618, + "loss_iou": 0.310546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 323035692, + "step": 3331 + }, + { + "epoch": 0.32577238951896753, + "grad_norm": 2.059334791090694, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 323132168, + "step": 3332 + }, + { + "epoch": 0.32577238951896753, + "loss": 0.050414592027664185, + "loss_ce": 0.005553750786930323, + "loss_iou": 0.244140625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 323132168, + "step": 3332 + }, + { + "epoch": 0.3258701603441533, + "grad_norm": 6.260042845088206, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 323230184, + "step": 3333 + }, + { + "epoch": 0.3258701603441533, + "loss": 0.06817230582237244, + "loss_ce": 0.002284852322191, + "loss_iou": 0.30078125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 323230184, + "step": 3333 + }, + { + "epoch": 0.3259679311693391, + "grad_norm": 3.2327192046933875, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 323326456, + "step": 3334 + }, + { + "epoch": 0.3259679311693391, + "loss": 0.15166780352592468, + "loss_ce": 0.00765534583479166, + "loss_iou": 0.2255859375, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 323326456, + "step": 3334 + }, + { + "epoch": 0.32606570199452484, + "grad_norm": 63.664718227257865, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 323423404, + "step": 3335 + }, + { + "epoch": 0.32606570199452484, + "loss": 0.07514986395835876, + "loss_ce": 0.005554523319005966, + "loss_iou": 0.2578125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 323423404, + "step": 3335 + }, + { + "epoch": 0.3261634728197106, + "grad_norm": 142.38335153793045, + "learning_rate": 5e-05, + "loss": 0.1603, + "num_input_tokens_seen": 323520496, + "step": 3336 + }, + { + "epoch": 0.3261634728197106, + "loss": 0.18723700940608978, + "loss_ce": 0.011516791768372059, + "loss_iou": 0.34765625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 323520496, + "step": 3336 + }, + { + "epoch": 0.32626124364489634, + "grad_norm": 16.575158958987995, + "learning_rate": 5e-05, + "loss": 0.1185, + "num_input_tokens_seen": 323618192, + "step": 3337 + }, + { + "epoch": 0.32626124364489634, + "loss": 0.1322813630104065, + "loss_ce": 0.008074820041656494, + "loss_iou": 0.33984375, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 323618192, + "step": 3337 + }, + { + "epoch": 0.32635901447008214, + "grad_norm": 3.8701446258004526, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 323715488, + "step": 3338 + }, + { + "epoch": 0.32635901447008214, + "loss": 0.1141965389251709, + "loss_ce": 0.0051572285592556, + "loss_iou": 0.376953125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 323715488, + "step": 3338 + }, + { + "epoch": 0.3264567852952679, + "grad_norm": 16.972985783857578, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 323812444, + "step": 3339 + }, + { + "epoch": 0.3264567852952679, + "loss": 0.1080179363489151, + "loss_ce": 0.004166618920862675, + "loss_iou": 0.240234375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 323812444, + "step": 3339 + }, + { + "epoch": 0.32655455612045364, + "grad_norm": 2.362367250675936, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 323908624, + "step": 3340 + }, + { + "epoch": 0.32655455612045364, + "loss": 0.051783040165901184, + "loss_ce": 0.006411035545170307, + "loss_iou": 0.2392578125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 323908624, + "step": 3340 + }, + { + "epoch": 0.32665232694563945, + "grad_norm": 5.559603717768994, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 324005040, + "step": 3341 + }, + { + "epoch": 0.32665232694563945, + "loss": 0.10592500865459442, + "loss_ce": 0.012388630770146847, + "loss_iou": 0.236328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 324005040, + "step": 3341 + }, + { + "epoch": 0.3267500977708252, + "grad_norm": 7.853568389567172, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 324101468, + "step": 3342 + }, + { + "epoch": 0.3267500977708252, + "loss": 0.06289558857679367, + "loss_ce": 0.007765584159642458, + "loss_iou": 0.296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 324101468, + "step": 3342 + }, + { + "epoch": 0.32684786859601095, + "grad_norm": 12.961905500591929, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 324199672, + "step": 3343 + }, + { + "epoch": 0.32684786859601095, + "loss": 0.07978741079568863, + "loss_ce": 0.004164854064583778, + "loss_iou": 0.353515625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 324199672, + "step": 3343 + }, + { + "epoch": 0.3269456394211967, + "grad_norm": 6.450072372566124, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 324296780, + "step": 3344 + }, + { + "epoch": 0.3269456394211967, + "loss": 0.11199869960546494, + "loss_ce": 0.004271651618182659, + "loss_iou": 0.3359375, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 324296780, + "step": 3344 + }, + { + "epoch": 0.3270434102463825, + "grad_norm": 13.606323662290626, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 324393376, + "step": 3345 + }, + { + "epoch": 0.3270434102463825, + "loss": 0.07706131786108017, + "loss_ce": 0.004902505781501532, + "loss_iou": 0.3359375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 324393376, + "step": 3345 + }, + { + "epoch": 0.32714118107156825, + "grad_norm": 5.823570759357454, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 324491836, + "step": 3346 + }, + { + "epoch": 0.32714118107156825, + "loss": 0.06762313842773438, + "loss_ce": 0.0014610213693231344, + "loss_iou": 0.33984375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 324491836, + "step": 3346 + }, + { + "epoch": 0.327238951896754, + "grad_norm": 6.221650188713973, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 324588388, + "step": 3347 + }, + { + "epoch": 0.327238951896754, + "loss": 0.11502466350793839, + "loss_ce": 0.007351023145020008, + "loss_iou": 0.32421875, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 324588388, + "step": 3347 + }, + { + "epoch": 0.32733672272193975, + "grad_norm": 2.462415369296466, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 324684588, + "step": 3348 + }, + { + "epoch": 0.32733672272193975, + "loss": 0.056722283363342285, + "loss_ce": 0.0075889816507697105, + "loss_iou": 0.228515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 324684588, + "step": 3348 + }, + { + "epoch": 0.32743449354712556, + "grad_norm": 8.088836508890635, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 324781904, + "step": 3349 + }, + { + "epoch": 0.32743449354712556, + "loss": 0.11881144344806671, + "loss_ce": 0.004263711161911488, + "loss_iou": 0.216796875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 324781904, + "step": 3349 + }, + { + "epoch": 0.3275322643723113, + "grad_norm": 5.105890445988728, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 324879352, + "step": 3350 + }, + { + "epoch": 0.3275322643723113, + "loss": 0.05994845926761627, + "loss_ce": 0.001225006184540689, + "loss_iou": 0.3828125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 324879352, + "step": 3350 + }, + { + "epoch": 0.32763003519749706, + "grad_norm": 6.809880648970021, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 324976892, + "step": 3351 + }, + { + "epoch": 0.32763003519749706, + "loss": 0.12339654564857483, + "loss_ce": 0.008345277048647404, + "loss_iou": 0.37890625, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 324976892, + "step": 3351 + }, + { + "epoch": 0.3277278060226828, + "grad_norm": 3.756462356434371, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 325073740, + "step": 3352 + }, + { + "epoch": 0.3277278060226828, + "loss": 0.08070725202560425, + "loss_ce": 0.00314682861790061, + "loss_iou": 0.2333984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 325073740, + "step": 3352 + }, + { + "epoch": 0.3278255768478686, + "grad_norm": 2.024028506124502, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 325170252, + "step": 3353 + }, + { + "epoch": 0.3278255768478686, + "loss": 0.05372945964336395, + "loss_ce": 0.005275175906717777, + "loss_iou": 0.30078125, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 325170252, + "step": 3353 + }, + { + "epoch": 0.32792334767305437, + "grad_norm": 7.946985593950964, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 325267276, + "step": 3354 + }, + { + "epoch": 0.32792334767305437, + "loss": 0.07206478714942932, + "loss_ce": 0.003293425776064396, + "loss_iou": 0.2890625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 325267276, + "step": 3354 + }, + { + "epoch": 0.3280211184982401, + "grad_norm": 5.3500688839731545, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 325363536, + "step": 3355 + }, + { + "epoch": 0.3280211184982401, + "loss": 0.02967086061835289, + "loss_ce": 0.003685144241899252, + "loss_iou": 0.244140625, + "loss_num": 0.00518798828125, + "loss_xval": 0.0260009765625, + "num_input_tokens_seen": 325363536, + "step": 3355 + }, + { + "epoch": 0.32811888932342587, + "grad_norm": 18.648070176372247, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 325459976, + "step": 3356 + }, + { + "epoch": 0.32811888932342587, + "loss": 0.1282579004764557, + "loss_ce": 0.004024662543088198, + "loss_iou": 0.1572265625, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 325459976, + "step": 3356 + }, + { + "epoch": 0.32821666014861167, + "grad_norm": 6.49383753916291, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 325557360, + "step": 3357 + }, + { + "epoch": 0.32821666014861167, + "loss": 0.07544473558664322, + "loss_ce": 0.0028739357367157936, + "loss_iou": 0.359375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 325557360, + "step": 3357 + }, + { + "epoch": 0.3283144309737974, + "grad_norm": 5.863076648025317, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 325654972, + "step": 3358 + }, + { + "epoch": 0.3283144309737974, + "loss": 0.08090177178382874, + "loss_ce": 0.003875406924635172, + "loss_iou": 0.431640625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 325654972, + "step": 3358 + }, + { + "epoch": 0.32841220179898317, + "grad_norm": 10.918976767763478, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 325751592, + "step": 3359 + }, + { + "epoch": 0.32841220179898317, + "loss": 0.05455242842435837, + "loss_ce": 0.007906311191618443, + "loss_iou": 0.40234375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 325751592, + "step": 3359 + }, + { + "epoch": 0.3285099726241689, + "grad_norm": 6.964452523679254, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 325848536, + "step": 3360 + }, + { + "epoch": 0.3285099726241689, + "loss": 0.10178285837173462, + "loss_ce": 0.0022955513559281826, + "loss_iou": 0.236328125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 325848536, + "step": 3360 + }, + { + "epoch": 0.32860774344935473, + "grad_norm": 4.26483912281709, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 325944092, + "step": 3361 + }, + { + "epoch": 0.32860774344935473, + "loss": 0.1046941876411438, + "loss_ce": 0.010433025658130646, + "loss_iou": 0.30859375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 325944092, + "step": 3361 + }, + { + "epoch": 0.3287055142745405, + "grad_norm": 5.728679433705732, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 326041388, + "step": 3362 + }, + { + "epoch": 0.3287055142745405, + "loss": 0.07596679031848907, + "loss_ce": 0.0038537499494850636, + "loss_iou": 0.33203125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 326041388, + "step": 3362 + }, + { + "epoch": 0.32880328509972623, + "grad_norm": 5.535942867030601, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 326138360, + "step": 3363 + }, + { + "epoch": 0.32880328509972623, + "loss": 0.05339694023132324, + "loss_ce": 0.004660365171730518, + "loss_iou": 0.35546875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 326138360, + "step": 3363 + }, + { + "epoch": 0.32890105592491203, + "grad_norm": 5.089810476848345, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 326234968, + "step": 3364 + }, + { + "epoch": 0.32890105592491203, + "loss": 0.08627376705408096, + "loss_ce": 0.006500815041363239, + "loss_iou": 0.3359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 326234968, + "step": 3364 + }, + { + "epoch": 0.3289988267500978, + "grad_norm": 6.278314730976522, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 326332868, + "step": 3365 + }, + { + "epoch": 0.3289988267500978, + "loss": 0.08557352423667908, + "loss_ce": 0.005525916814804077, + "loss_iou": 0.3359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 326332868, + "step": 3365 + }, + { + "epoch": 0.32909659757528353, + "grad_norm": 22.75671655942991, + "learning_rate": 5e-05, + "loss": 0.1497, + "num_input_tokens_seen": 326429276, + "step": 3366 + }, + { + "epoch": 0.32909659757528353, + "loss": 0.20074205100536346, + "loss_ce": 0.009579936042428017, + "loss_iou": 0.259765625, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 326429276, + "step": 3366 + }, + { + "epoch": 0.3291943684004693, + "grad_norm": 8.620767662163138, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 326527380, + "step": 3367 + }, + { + "epoch": 0.3291943684004693, + "loss": 0.11680483818054199, + "loss_ce": 0.005583523772656918, + "loss_iou": 0.423828125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 326527380, + "step": 3367 + }, + { + "epoch": 0.3292921392256551, + "grad_norm": 8.04227036722131, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 326623568, + "step": 3368 + }, + { + "epoch": 0.3292921392256551, + "loss": 0.11518372595310211, + "loss_ce": 0.0019330019131302834, + "loss_iou": 0.28515625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 326623568, + "step": 3368 + }, + { + "epoch": 0.32938991005084084, + "grad_norm": 14.712510405527953, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 326720700, + "step": 3369 + }, + { + "epoch": 0.32938991005084084, + "loss": 0.09714116156101227, + "loss_ce": 0.007503402419388294, + "loss_iou": 0.32421875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 326720700, + "step": 3369 + }, + { + "epoch": 0.3294876808760266, + "grad_norm": 10.654996177869661, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 326817268, + "step": 3370 + }, + { + "epoch": 0.3294876808760266, + "loss": 0.12807680666446686, + "loss_ce": 0.00420595845207572, + "loss_iou": 0.376953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 326817268, + "step": 3370 + }, + { + "epoch": 0.32958545170121234, + "grad_norm": 13.926957487553901, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 326915172, + "step": 3371 + }, + { + "epoch": 0.32958545170121234, + "loss": 0.08333078026771545, + "loss_ce": 0.005190521944314241, + "loss_iou": 0.259765625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 326915172, + "step": 3371 + }, + { + "epoch": 0.32968322252639815, + "grad_norm": 15.788459244668193, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 327011184, + "step": 3372 + }, + { + "epoch": 0.32968322252639815, + "loss": 0.07915932685136795, + "loss_ce": 0.002560205524787307, + "loss_iou": 0.328125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 327011184, + "step": 3372 + }, + { + "epoch": 0.3297809933515839, + "grad_norm": 3.6486431244785615, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 327107532, + "step": 3373 + }, + { + "epoch": 0.3297809933515839, + "loss": 0.05744670704007149, + "loss_ce": 0.004010429605841637, + "loss_iou": 0.25390625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 327107532, + "step": 3373 + }, + { + "epoch": 0.32987876417676965, + "grad_norm": 5.992347694969039, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 327204732, + "step": 3374 + }, + { + "epoch": 0.32987876417676965, + "loss": 0.06701074540615082, + "loss_ce": 0.003755433950573206, + "loss_iou": 0.28125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 327204732, + "step": 3374 + }, + { + "epoch": 0.3299765350019554, + "grad_norm": 5.2341260399999925, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 327301688, + "step": 3375 + }, + { + "epoch": 0.3299765350019554, + "loss": 0.09154042601585388, + "loss_ce": 0.0032530769240111113, + "loss_iou": 0.341796875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 327301688, + "step": 3375 + }, + { + "epoch": 0.3300743058271412, + "grad_norm": 4.044258360161751, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 327398748, + "step": 3376 + }, + { + "epoch": 0.3300743058271412, + "loss": 0.11174878478050232, + "loss_ce": 0.005333986598998308, + "loss_iou": 0.291015625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 327398748, + "step": 3376 + }, + { + "epoch": 0.33017207665232695, + "grad_norm": 2.4848474354093812, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 327496404, + "step": 3377 + }, + { + "epoch": 0.33017207665232695, + "loss": 0.05781165882945061, + "loss_ce": 0.001880569034256041, + "loss_iou": 0.318359375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 327496404, + "step": 3377 + }, + { + "epoch": 0.3302698474775127, + "grad_norm": 4.851697067351721, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 327593076, + "step": 3378 + }, + { + "epoch": 0.3302698474775127, + "loss": 0.0658797174692154, + "loss_ce": 0.006446728482842445, + "loss_iou": 0.2890625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 327593076, + "step": 3378 + }, + { + "epoch": 0.33036761830269845, + "grad_norm": 9.234791970085867, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 327690644, + "step": 3379 + }, + { + "epoch": 0.33036761830269845, + "loss": 0.07563400268554688, + "loss_ce": 0.0012931755045428872, + "loss_iou": 0.314453125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 327690644, + "step": 3379 + }, + { + "epoch": 0.33046538912788426, + "grad_norm": 8.922086629244214, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 327787688, + "step": 3380 + }, + { + "epoch": 0.33046538912788426, + "loss": 0.1145167350769043, + "loss_ce": 0.003371722996234894, + "loss_iou": 0.349609375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 327787688, + "step": 3380 + }, + { + "epoch": 0.33056315995307, + "grad_norm": 9.65409623561922, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 327884532, + "step": 3381 + }, + { + "epoch": 0.33056315995307, + "loss": 0.09548459202051163, + "loss_ce": 0.004187442362308502, + "loss_iou": 0.2392578125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 327884532, + "step": 3381 + }, + { + "epoch": 0.33066093077825576, + "grad_norm": 8.898126060759099, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 327981576, + "step": 3382 + }, + { + "epoch": 0.33066093077825576, + "loss": 0.09167651832103729, + "loss_ce": 0.004030038136988878, + "loss_iou": 0.578125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 327981576, + "step": 3382 + }, + { + "epoch": 0.3307587016034415, + "grad_norm": 6.749793684168006, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 328079464, + "step": 3383 + }, + { + "epoch": 0.3307587016034415, + "loss": 0.0875474065542221, + "loss_ce": 0.0018693041056394577, + "loss_iou": 0.349609375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 328079464, + "step": 3383 + }, + { + "epoch": 0.3308564724286273, + "grad_norm": 5.874525575157, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 328176416, + "step": 3384 + }, + { + "epoch": 0.3308564724286273, + "loss": 0.06464605778455734, + "loss_ce": 0.010111141949892044, + "loss_iou": 0.279296875, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 328176416, + "step": 3384 + }, + { + "epoch": 0.33095424325381306, + "grad_norm": 6.92624460535086, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 328273600, + "step": 3385 + }, + { + "epoch": 0.33095424325381306, + "loss": 0.083057701587677, + "loss_ce": 0.008701616898179054, + "loss_iou": 0.255859375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 328273600, + "step": 3385 + }, + { + "epoch": 0.3310520140789988, + "grad_norm": 14.334597199471201, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 328370436, + "step": 3386 + }, + { + "epoch": 0.3310520140789988, + "loss": 0.09019351005554199, + "loss_ce": 0.004622222855687141, + "loss_iou": 0.2890625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 328370436, + "step": 3386 + }, + { + "epoch": 0.3311497849041846, + "grad_norm": 12.265739408841702, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 328467508, + "step": 3387 + }, + { + "epoch": 0.3311497849041846, + "loss": 0.12559664249420166, + "loss_ce": 0.004731776192784309, + "loss_iou": 0.326171875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 328467508, + "step": 3387 + }, + { + "epoch": 0.33124755572937037, + "grad_norm": 8.446722411379683, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 328563904, + "step": 3388 + }, + { + "epoch": 0.33124755572937037, + "loss": 0.046094637364149094, + "loss_ce": 0.006910066120326519, + "loss_iou": 0.28515625, + "loss_num": 0.00787353515625, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 328563904, + "step": 3388 + }, + { + "epoch": 0.3313453265545561, + "grad_norm": 2.3902636675142905, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 328661028, + "step": 3389 + }, + { + "epoch": 0.3313453265545561, + "loss": 0.06493104994297028, + "loss_ce": 0.005025048274546862, + "loss_iou": 0.30078125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 328661028, + "step": 3389 + }, + { + "epoch": 0.33144309737974187, + "grad_norm": 4.839876988832669, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 328757996, + "step": 3390 + }, + { + "epoch": 0.33144309737974187, + "loss": 0.08132302761077881, + "loss_ce": 0.00595224229618907, + "loss_iou": 0.361328125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 328757996, + "step": 3390 + }, + { + "epoch": 0.3315408682049277, + "grad_norm": 6.3316378852532065, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 328854912, + "step": 3391 + }, + { + "epoch": 0.3315408682049277, + "loss": 0.10288561135530472, + "loss_ce": 0.010341055691242218, + "loss_iou": 0.271484375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 328854912, + "step": 3391 + }, + { + "epoch": 0.3316386390301134, + "grad_norm": 3.4141983083573235, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 328951296, + "step": 3392 + }, + { + "epoch": 0.3316386390301134, + "loss": 0.13352209329605103, + "loss_ce": 0.006294308230280876, + "loss_iou": 0.341796875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 328951296, + "step": 3392 + }, + { + "epoch": 0.3317364098552992, + "grad_norm": 2.9759685749825353, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 329047800, + "step": 3393 + }, + { + "epoch": 0.3317364098552992, + "loss": 0.08410091698169708, + "loss_ce": 0.003503995481878519, + "loss_iou": 0.283203125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 329047800, + "step": 3393 + }, + { + "epoch": 0.3318341806804849, + "grad_norm": 18.15574540641699, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 329145188, + "step": 3394 + }, + { + "epoch": 0.3318341806804849, + "loss": 0.06518451869487762, + "loss_ce": 0.0060796006582677364, + "loss_iou": 0.365234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 329145188, + "step": 3394 + }, + { + "epoch": 0.33193195150567073, + "grad_norm": 8.265018346683807, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 329242808, + "step": 3395 + }, + { + "epoch": 0.33193195150567073, + "loss": 0.09274835884571075, + "loss_ce": 0.007680603768676519, + "loss_iou": 0.33203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 329242808, + "step": 3395 + }, + { + "epoch": 0.3320297223308565, + "grad_norm": 3.1233892540310597, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 329339848, + "step": 3396 + }, + { + "epoch": 0.3320297223308565, + "loss": 0.05878026783466339, + "loss_ce": 0.004649711307138205, + "loss_iou": 0.2392578125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 329339848, + "step": 3396 + }, + { + "epoch": 0.33212749315604223, + "grad_norm": 2.925159196443268, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 329436128, + "step": 3397 + }, + { + "epoch": 0.33212749315604223, + "loss": 0.05187034606933594, + "loss_ce": 0.006940844934433699, + "loss_iou": 0.3046875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 329436128, + "step": 3397 + }, + { + "epoch": 0.332225263981228, + "grad_norm": 75.04834189123423, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 329532780, + "step": 3398 + }, + { + "epoch": 0.332225263981228, + "loss": 0.09596116840839386, + "loss_ce": 0.001345230033621192, + "loss_iou": 0.318359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 329532780, + "step": 3398 + }, + { + "epoch": 0.3323230348064138, + "grad_norm": 48.486383789773605, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 329629076, + "step": 3399 + }, + { + "epoch": 0.3323230348064138, + "loss": 0.09101328998804092, + "loss_ce": 0.009043078869581223, + "loss_iou": 0.341796875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 329629076, + "step": 3399 + }, + { + "epoch": 0.33242080563159954, + "grad_norm": 4.708483966316917, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 329725528, + "step": 3400 + }, + { + "epoch": 0.33242080563159954, + "loss": 0.10122039914131165, + "loss_ce": 0.010194092057645321, + "loss_iou": 0.326171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 329725528, + "step": 3400 + }, + { + "epoch": 0.3325185764567853, + "grad_norm": 3.5710629756461985, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 329822112, + "step": 3401 + }, + { + "epoch": 0.3325185764567853, + "loss": 0.08574018627405167, + "loss_ce": 0.007767775095999241, + "loss_iou": 0.390625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 329822112, + "step": 3401 + }, + { + "epoch": 0.33261634728197104, + "grad_norm": 4.062302774010963, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 329919804, + "step": 3402 + }, + { + "epoch": 0.33261634728197104, + "loss": 0.09199493378400803, + "loss_ce": 0.004668881185352802, + "loss_iou": 0.328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 329919804, + "step": 3402 + }, + { + "epoch": 0.33271411810715684, + "grad_norm": 8.304067928139135, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 330015832, + "step": 3403 + }, + { + "epoch": 0.33271411810715684, + "loss": 0.12142196297645569, + "loss_ce": 0.005668797064572573, + "loss_iou": 0.185546875, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 330015832, + "step": 3403 + }, + { + "epoch": 0.3328118889323426, + "grad_norm": 9.38668916964073, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 330112496, + "step": 3404 + }, + { + "epoch": 0.3328118889323426, + "loss": 0.05895392596721649, + "loss_ce": 0.004403752274811268, + "loss_iou": 0.380859375, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 330112496, + "step": 3404 + }, + { + "epoch": 0.33290965975752834, + "grad_norm": 5.937107094719287, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 330209624, + "step": 3405 + }, + { + "epoch": 0.33290965975752834, + "loss": 0.09172612428665161, + "loss_ce": 0.005468190647661686, + "loss_iou": 0.3046875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 330209624, + "step": 3405 + }, + { + "epoch": 0.3330074305827141, + "grad_norm": 5.703718925590516, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 330307096, + "step": 3406 + }, + { + "epoch": 0.3330074305827141, + "loss": 0.07762478291988373, + "loss_ce": 0.001971704885363579, + "loss_iou": 0.392578125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 330307096, + "step": 3406 + }, + { + "epoch": 0.3331052014078999, + "grad_norm": 3.0399748010842824, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 330404356, + "step": 3407 + }, + { + "epoch": 0.3331052014078999, + "loss": 0.07485233247280121, + "loss_ce": 0.005089154001325369, + "loss_iou": 0.26953125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 330404356, + "step": 3407 + }, + { + "epoch": 0.33320297223308565, + "grad_norm": 3.119578065438363, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 330501064, + "step": 3408 + }, + { + "epoch": 0.33320297223308565, + "loss": 0.06084754317998886, + "loss_ce": 0.003123542759567499, + "loss_iou": 0.31640625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 330501064, + "step": 3408 + }, + { + "epoch": 0.3333007430582714, + "grad_norm": 4.496961702172206, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 330597820, + "step": 3409 + }, + { + "epoch": 0.3333007430582714, + "loss": 0.05824188143014908, + "loss_ce": 0.0028372169472277164, + "loss_iou": 0.23828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 330597820, + "step": 3409 + }, + { + "epoch": 0.3333985138834572, + "grad_norm": 2.749537311995228, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 330694812, + "step": 3410 + }, + { + "epoch": 0.3333985138834572, + "loss": 0.08926912397146225, + "loss_ce": 0.003774127457290888, + "loss_iou": 0.392578125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 330694812, + "step": 3410 + }, + { + "epoch": 0.33349628470864295, + "grad_norm": 7.353133548451942, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 330792336, + "step": 3411 + }, + { + "epoch": 0.33349628470864295, + "loss": 0.14333540201187134, + "loss_ce": 0.007425365969538689, + "loss_iou": 0.275390625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 330792336, + "step": 3411 + }, + { + "epoch": 0.3335940555338287, + "grad_norm": 12.830341707714553, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 330889464, + "step": 3412 + }, + { + "epoch": 0.3335940555338287, + "loss": 0.0690406784415245, + "loss_ce": 0.0020545925945043564, + "loss_iou": 0.36328125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 330889464, + "step": 3412 + }, + { + "epoch": 0.33369182635901445, + "grad_norm": 63.702414020248305, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 330986844, + "step": 3413 + }, + { + "epoch": 0.33369182635901445, + "loss": 0.11394184827804565, + "loss_ce": 0.006504716817289591, + "loss_iou": 0.37109375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 330986844, + "step": 3413 + }, + { + "epoch": 0.33378959718420026, + "grad_norm": 6.884929670329037, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 331083356, + "step": 3414 + }, + { + "epoch": 0.33378959718420026, + "loss": 0.10123134404420853, + "loss_ce": 0.0037276751827448606, + "loss_iou": 0.3046875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 331083356, + "step": 3414 + }, + { + "epoch": 0.333887368009386, + "grad_norm": 6.219049934527831, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 331177920, + "step": 3415 + }, + { + "epoch": 0.333887368009386, + "loss": 0.11342307925224304, + "loss_ce": 0.005329812876880169, + "loss_iou": 0.27734375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 331177920, + "step": 3415 + }, + { + "epoch": 0.33398513883457176, + "grad_norm": 13.228619940162192, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 331274744, + "step": 3416 + }, + { + "epoch": 0.33398513883457176, + "loss": 0.1468825489282608, + "loss_ce": 0.00958396028727293, + "loss_iou": 0.1962890625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 331274744, + "step": 3416 + }, + { + "epoch": 0.3340829096597575, + "grad_norm": 15.891712272545993, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 331371248, + "step": 3417 + }, + { + "epoch": 0.3340829096597575, + "loss": 0.0823945552110672, + "loss_ce": 0.0038461301010102034, + "loss_iou": 0.271484375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 331371248, + "step": 3417 + }, + { + "epoch": 0.3341806804849433, + "grad_norm": 8.736608389825033, + "learning_rate": 5e-05, + "loss": 0.1215, + "num_input_tokens_seen": 331467624, + "step": 3418 + }, + { + "epoch": 0.3341806804849433, + "loss": 0.16021883487701416, + "loss_ce": 0.002824431983754039, + "loss_iou": 0.3046875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 331467624, + "step": 3418 + }, + { + "epoch": 0.33427845131012907, + "grad_norm": 8.77120023103999, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 331563956, + "step": 3419 + }, + { + "epoch": 0.33427845131012907, + "loss": 0.05967369303107262, + "loss_ce": 0.0021328008733689785, + "loss_iou": 0.298828125, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 331563956, + "step": 3419 + }, + { + "epoch": 0.3343762221353148, + "grad_norm": 7.260943645864516, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 331660536, + "step": 3420 + }, + { + "epoch": 0.3343762221353148, + "loss": 0.07144039869308472, + "loss_ce": 0.0038782921619713306, + "loss_iou": 0.341796875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 331660536, + "step": 3420 + }, + { + "epoch": 0.33447399296050057, + "grad_norm": 12.51366642794558, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 331757756, + "step": 3421 + }, + { + "epoch": 0.33447399296050057, + "loss": 0.07690881937742233, + "loss_ce": 0.005100954324007034, + "loss_iou": 0.361328125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 331757756, + "step": 3421 + }, + { + "epoch": 0.33457176378568637, + "grad_norm": 23.310773372318888, + "learning_rate": 5e-05, + "loss": 0.1207, + "num_input_tokens_seen": 331855440, + "step": 3422 + }, + { + "epoch": 0.33457176378568637, + "loss": 0.09663371741771698, + "loss_ce": 0.004424853250384331, + "loss_iou": 0.41015625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 331855440, + "step": 3422 + }, + { + "epoch": 0.3346695346108721, + "grad_norm": 37.92306623838122, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 331950980, + "step": 3423 + }, + { + "epoch": 0.3346695346108721, + "loss": 0.14188465476036072, + "loss_ce": 0.029366357252001762, + "loss_iou": 0.23828125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 331950980, + "step": 3423 + }, + { + "epoch": 0.33476730543605787, + "grad_norm": 12.431689751935034, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 332048888, + "step": 3424 + }, + { + "epoch": 0.33476730543605787, + "loss": 0.09653589874505997, + "loss_ce": 0.004021863453090191, + "loss_iou": 0.388671875, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 332048888, + "step": 3424 + }, + { + "epoch": 0.3348650762612436, + "grad_norm": 5.003272627444787, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 332146020, + "step": 3425 + }, + { + "epoch": 0.3348650762612436, + "loss": 0.1108865737915039, + "loss_ce": 0.009049413725733757, + "loss_iou": 0.32421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 332146020, + "step": 3425 + }, + { + "epoch": 0.3349628470864294, + "grad_norm": 11.867005574644764, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 332243104, + "step": 3426 + }, + { + "epoch": 0.3349628470864294, + "loss": 0.10052184760570526, + "loss_ce": 0.007641593925654888, + "loss_iou": 0.369140625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 332243104, + "step": 3426 + }, + { + "epoch": 0.3350606179116152, + "grad_norm": 9.650084697257329, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 332339608, + "step": 3427 + }, + { + "epoch": 0.3350606179116152, + "loss": 0.08236833661794662, + "loss_ce": 0.0016646023141220212, + "loss_iou": 0.265625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 332339608, + "step": 3427 + }, + { + "epoch": 0.3351583887368009, + "grad_norm": 7.146829928989876, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 332435844, + "step": 3428 + }, + { + "epoch": 0.3351583887368009, + "loss": 0.11834178864955902, + "loss_ce": 0.00783764198422432, + "loss_iou": 0.310546875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 332435844, + "step": 3428 + }, + { + "epoch": 0.3352561595619867, + "grad_norm": 3.3617598569562097, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 332532592, + "step": 3429 + }, + { + "epoch": 0.3352561595619867, + "loss": 0.09891248494386673, + "loss_ce": 0.005116707179695368, + "loss_iou": 0.296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 332532592, + "step": 3429 + }, + { + "epoch": 0.3353539303871725, + "grad_norm": 15.551003392166479, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 332629032, + "step": 3430 + }, + { + "epoch": 0.3353539303871725, + "loss": 0.06697332113981247, + "loss_ce": 0.005724538583308458, + "loss_iou": 0.361328125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 332629032, + "step": 3430 + }, + { + "epoch": 0.33545170121235823, + "grad_norm": 27.704780328184444, + "learning_rate": 5e-05, + "loss": 0.1167, + "num_input_tokens_seen": 332726228, + "step": 3431 + }, + { + "epoch": 0.33545170121235823, + "loss": 0.14563584327697754, + "loss_ce": 0.004812472499907017, + "loss_iou": 0.326171875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 332726228, + "step": 3431 + }, + { + "epoch": 0.335549472037544, + "grad_norm": 12.518265583126347, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 332822860, + "step": 3432 + }, + { + "epoch": 0.335549472037544, + "loss": 0.09481556713581085, + "loss_ce": 0.004468278959393501, + "loss_iou": 0.359375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 332822860, + "step": 3432 + }, + { + "epoch": 0.3356472428627298, + "grad_norm": 11.033242094533495, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 332920276, + "step": 3433 + }, + { + "epoch": 0.3356472428627298, + "loss": 0.079840287566185, + "loss_ce": 0.0076967328786849976, + "loss_iou": 0.439453125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 332920276, + "step": 3433 + }, + { + "epoch": 0.33574501368791554, + "grad_norm": 10.527522345067462, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 333016356, + "step": 3434 + }, + { + "epoch": 0.33574501368791554, + "loss": 0.06174955889582634, + "loss_ce": 0.010256869718432426, + "loss_iou": 0.2451171875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 333016356, + "step": 3434 + }, + { + "epoch": 0.3358427845131013, + "grad_norm": 3.7396133963016687, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 333113352, + "step": 3435 + }, + { + "epoch": 0.3358427845131013, + "loss": 0.06555318832397461, + "loss_ce": 0.00688314251601696, + "loss_iou": 0.2333984375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 333113352, + "step": 3435 + }, + { + "epoch": 0.33594055533828704, + "grad_norm": 11.737754353401291, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 333209780, + "step": 3436 + }, + { + "epoch": 0.33594055533828704, + "loss": 0.060130394995212555, + "loss_ce": 0.00674752052873373, + "loss_iou": 0.2890625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 333209780, + "step": 3436 + }, + { + "epoch": 0.33603832616347284, + "grad_norm": 16.092994677025153, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 333306948, + "step": 3437 + }, + { + "epoch": 0.33603832616347284, + "loss": 0.06436021625995636, + "loss_ce": 0.0017305160872638226, + "loss_iou": 0.234375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 333306948, + "step": 3437 + }, + { + "epoch": 0.3361360969886586, + "grad_norm": 9.174705887054488, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 333403884, + "step": 3438 + }, + { + "epoch": 0.3361360969886586, + "loss": 0.08645398914813995, + "loss_ce": 0.008657054975628853, + "loss_iou": 0.38671875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 333403884, + "step": 3438 + }, + { + "epoch": 0.33623386781384434, + "grad_norm": 12.168670361019139, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 333500768, + "step": 3439 + }, + { + "epoch": 0.33623386781384434, + "loss": 0.06482192873954773, + "loss_ce": 0.002840731292963028, + "loss_iou": 0.29296875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 333500768, + "step": 3439 + }, + { + "epoch": 0.3363316386390301, + "grad_norm": 22.674299049110076, + "learning_rate": 5e-05, + "loss": 0.1296, + "num_input_tokens_seen": 333598056, + "step": 3440 + }, + { + "epoch": 0.3363316386390301, + "loss": 0.09474842250347137, + "loss_ce": 0.0056676110252738, + "loss_iou": 0.359375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 333598056, + "step": 3440 + }, + { + "epoch": 0.3364294094642159, + "grad_norm": 25.549310108947644, + "learning_rate": 5e-05, + "loss": 0.119, + "num_input_tokens_seen": 333695280, + "step": 3441 + }, + { + "epoch": 0.3364294094642159, + "loss": 0.1235649585723877, + "loss_ce": 0.007308248430490494, + "loss_iou": 0.337890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 333695280, + "step": 3441 + }, + { + "epoch": 0.33652718028940165, + "grad_norm": 14.31655366747238, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 333792464, + "step": 3442 + }, + { + "epoch": 0.33652718028940165, + "loss": 0.09629244357347488, + "loss_ce": 0.004098840523511171, + "loss_iou": 0.322265625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 333792464, + "step": 3442 + }, + { + "epoch": 0.3366249511145874, + "grad_norm": 7.579825657816507, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 333888844, + "step": 3443 + }, + { + "epoch": 0.3366249511145874, + "loss": 0.058937087655067444, + "loss_ce": 0.006439222022891045, + "loss_iou": 0.259765625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 333888844, + "step": 3443 + }, + { + "epoch": 0.33672272193977315, + "grad_norm": 9.41280289936419, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 333985996, + "step": 3444 + }, + { + "epoch": 0.33672272193977315, + "loss": 0.07067453116178513, + "loss_ce": 0.0037952607963234186, + "loss_iou": 0.29296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 333985996, + "step": 3444 + }, + { + "epoch": 0.33682049276495896, + "grad_norm": 12.563076890149397, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 334082720, + "step": 3445 + }, + { + "epoch": 0.33682049276495896, + "loss": 0.09474747627973557, + "loss_ce": 0.009023597463965416, + "loss_iou": 0.349609375, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 334082720, + "step": 3445 + }, + { + "epoch": 0.3369182635901447, + "grad_norm": 14.308060216998689, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 334179896, + "step": 3446 + }, + { + "epoch": 0.3369182635901447, + "loss": 0.08629143983125687, + "loss_ce": 0.006552821956574917, + "loss_iou": 0.36328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 334179896, + "step": 3446 + }, + { + "epoch": 0.33701603441533046, + "grad_norm": 23.65442853781865, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 334276008, + "step": 3447 + }, + { + "epoch": 0.33701603441533046, + "loss": 0.12950792908668518, + "loss_ce": 0.004065422806888819, + "loss_iou": 0.322265625, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 334276008, + "step": 3447 + }, + { + "epoch": 0.3371138052405162, + "grad_norm": 5.170706021452841, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 334373584, + "step": 3448 + }, + { + "epoch": 0.3371138052405162, + "loss": 0.07714448869228363, + "loss_ce": 0.006114826072007418, + "loss_iou": 0.388671875, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 334373584, + "step": 3448 + }, + { + "epoch": 0.337211576065702, + "grad_norm": 6.267762813473657, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 334470832, + "step": 3449 + }, + { + "epoch": 0.337211576065702, + "loss": 0.11217939853668213, + "loss_ce": 0.004849075339734554, + "loss_iou": 0.2197265625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 334470832, + "step": 3449 + }, + { + "epoch": 0.33730934689088776, + "grad_norm": 6.398995847606165, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 334567828, + "step": 3450 + }, + { + "epoch": 0.33730934689088776, + "loss": 0.0603344589471817, + "loss_ce": 0.00331236538477242, + "loss_iou": 0.24609375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 334567828, + "step": 3450 + }, + { + "epoch": 0.3374071177160735, + "grad_norm": 22.76239193752742, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 334664336, + "step": 3451 + }, + { + "epoch": 0.3374071177160735, + "loss": 0.11721906065940857, + "loss_ce": 0.0051585170440375805, + "loss_iou": 0.212890625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 334664336, + "step": 3451 + }, + { + "epoch": 0.33750488854125926, + "grad_norm": 3.4957761030791987, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 334760332, + "step": 3452 + }, + { + "epoch": 0.33750488854125926, + "loss": 0.11063912510871887, + "loss_ce": 0.0028510461561381817, + "loss_iou": 0.298828125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 334760332, + "step": 3452 + }, + { + "epoch": 0.33760265936644507, + "grad_norm": 15.974647125340441, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 334858160, + "step": 3453 + }, + { + "epoch": 0.33760265936644507, + "loss": 0.10232706367969513, + "loss_ce": 0.006807044614106417, + "loss_iou": 0.28515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 334858160, + "step": 3453 + }, + { + "epoch": 0.3377004301916308, + "grad_norm": 8.751427839521313, + "learning_rate": 5e-05, + "loss": 0.1447, + "num_input_tokens_seen": 334955404, + "step": 3454 + }, + { + "epoch": 0.3377004301916308, + "loss": 0.16834750771522522, + "loss_ce": 0.007733498699963093, + "loss_iou": 0.365234375, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 334955404, + "step": 3454 + }, + { + "epoch": 0.33779820101681657, + "grad_norm": 4.54018347172219, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 335052896, + "step": 3455 + }, + { + "epoch": 0.33779820101681657, + "loss": 0.04093010351061821, + "loss_ce": 0.00410492205992341, + "loss_iou": 0.291015625, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 335052896, + "step": 3455 + }, + { + "epoch": 0.3378959718420024, + "grad_norm": 13.318919957589824, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 335149920, + "step": 3456 + }, + { + "epoch": 0.3378959718420024, + "loss": 0.12076964974403381, + "loss_ce": 0.006679685320705175, + "loss_iou": 0.30078125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 335149920, + "step": 3456 + }, + { + "epoch": 0.3379937426671881, + "grad_norm": 6.9937742419740445, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 335247068, + "step": 3457 + }, + { + "epoch": 0.3379937426671881, + "loss": 0.10293518006801605, + "loss_ce": 0.008788453415036201, + "loss_iou": 0.3515625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 335247068, + "step": 3457 + }, + { + "epoch": 0.3380915134923739, + "grad_norm": 34.217851716940594, + "learning_rate": 5e-05, + "loss": 0.1305, + "num_input_tokens_seen": 335343896, + "step": 3458 + }, + { + "epoch": 0.3380915134923739, + "loss": 0.12025655061006546, + "loss_ce": 0.007295734249055386, + "loss_iou": 0.330078125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 335343896, + "step": 3458 + }, + { + "epoch": 0.3381892843175596, + "grad_norm": 18.55899060943806, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 335441380, + "step": 3459 + }, + { + "epoch": 0.3381892843175596, + "loss": 0.09935591369867325, + "loss_ce": 0.002645703963935375, + "loss_iou": 0.2314453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 335441380, + "step": 3459 + }, + { + "epoch": 0.33828705514274543, + "grad_norm": 10.159390304985092, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 335537884, + "step": 3460 + }, + { + "epoch": 0.33828705514274543, + "loss": 0.09630366414785385, + "loss_ce": 0.0031029819510877132, + "loss_iou": 0.296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 335537884, + "step": 3460 + }, + { + "epoch": 0.3383848259679312, + "grad_norm": 9.015974801634757, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 335635552, + "step": 3461 + }, + { + "epoch": 0.3383848259679312, + "loss": 0.09106113016605377, + "loss_ce": 0.005306739825755358, + "loss_iou": 0.349609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 335635552, + "step": 3461 + }, + { + "epoch": 0.33848259679311693, + "grad_norm": 5.710620151987727, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 335733960, + "step": 3462 + }, + { + "epoch": 0.33848259679311693, + "loss": 0.10044974833726883, + "loss_ce": 0.003480146173387766, + "loss_iou": 0.451171875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 335733960, + "step": 3462 + }, + { + "epoch": 0.3385803676183027, + "grad_norm": 11.517246151942802, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 335831696, + "step": 3463 + }, + { + "epoch": 0.3385803676183027, + "loss": 0.08778886497020721, + "loss_ce": 0.00316362245939672, + "loss_iou": 0.427734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 335831696, + "step": 3463 + }, + { + "epoch": 0.3386781384434885, + "grad_norm": 24.536592038139975, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 335928456, + "step": 3464 + }, + { + "epoch": 0.3386781384434885, + "loss": 0.09283918142318726, + "loss_ce": 0.008454261347651482, + "loss_iou": 0.244140625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 335928456, + "step": 3464 + }, + { + "epoch": 0.33877590926867424, + "grad_norm": 23.017684663116363, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 336026452, + "step": 3465 + }, + { + "epoch": 0.33877590926867424, + "loss": 0.08105643093585968, + "loss_ce": 0.006059480365365744, + "loss_iou": 0.40234375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 336026452, + "step": 3465 + }, + { + "epoch": 0.33887368009386, + "grad_norm": 12.831824895321189, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 336123872, + "step": 3466 + }, + { + "epoch": 0.33887368009386, + "loss": 0.07514851540327072, + "loss_ce": 0.002364093903452158, + "loss_iou": 0.42578125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 336123872, + "step": 3466 + }, + { + "epoch": 0.33897145091904574, + "grad_norm": 6.641755383921229, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 336221928, + "step": 3467 + }, + { + "epoch": 0.33897145091904574, + "loss": 0.07386421412229538, + "loss_ce": 0.0048944903537631035, + "loss_iou": 0.349609375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 336221928, + "step": 3467 + }, + { + "epoch": 0.33906922174423154, + "grad_norm": 6.469298770706658, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 336318728, + "step": 3468 + }, + { + "epoch": 0.33906922174423154, + "loss": 0.06437714397907257, + "loss_ce": 0.00305207259953022, + "loss_iou": 0.45703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 336318728, + "step": 3468 + }, + { + "epoch": 0.3391669925694173, + "grad_norm": 3.4904784718889865, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 336416200, + "step": 3469 + }, + { + "epoch": 0.3391669925694173, + "loss": 0.07125195860862732, + "loss_ce": 0.004784672055393457, + "loss_iou": 0.24609375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 336416200, + "step": 3469 + }, + { + "epoch": 0.33926476339460304, + "grad_norm": 3.142426567956408, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 336513528, + "step": 3470 + }, + { + "epoch": 0.33926476339460304, + "loss": 0.06562620401382446, + "loss_ce": 0.0035534449853003025, + "loss_iou": 0.345703125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 336513528, + "step": 3470 + }, + { + "epoch": 0.3393625342197888, + "grad_norm": 11.78531625771305, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 336610208, + "step": 3471 + }, + { + "epoch": 0.3393625342197888, + "loss": 0.04977841675281525, + "loss_ce": 0.005299046635627747, + "loss_iou": 0.259765625, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 336610208, + "step": 3471 + }, + { + "epoch": 0.3394603050449746, + "grad_norm": 27.1961737294893, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 336706988, + "step": 3472 + }, + { + "epoch": 0.3394603050449746, + "loss": 0.11392057687044144, + "loss_ce": 0.005712876096367836, + "loss_iou": 0.29296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 336706988, + "step": 3472 + }, + { + "epoch": 0.33955807587016035, + "grad_norm": 3.1545367483525717, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 336803564, + "step": 3473 + }, + { + "epoch": 0.33955807587016035, + "loss": 0.0959092229604721, + "loss_ce": 0.004325972404330969, + "loss_iou": 0.322265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 336803564, + "step": 3473 + }, + { + "epoch": 0.3396558466953461, + "grad_norm": 12.08909496402165, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 336900484, + "step": 3474 + }, + { + "epoch": 0.3396558466953461, + "loss": 0.11198759078979492, + "loss_ce": 0.002688889391720295, + "loss_iou": 0.453125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 336900484, + "step": 3474 + }, + { + "epoch": 0.33975361752053185, + "grad_norm": 7.363278063268369, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 336996972, + "step": 3475 + }, + { + "epoch": 0.33975361752053185, + "loss": 0.0977981686592102, + "loss_ce": 0.0018814157228916883, + "loss_iou": 0.388671875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 336996972, + "step": 3475 + }, + { + "epoch": 0.33985138834571765, + "grad_norm": 12.407205129261518, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 337094668, + "step": 3476 + }, + { + "epoch": 0.33985138834571765, + "loss": 0.13819052278995514, + "loss_ce": 0.00485922209918499, + "loss_iou": 0.37890625, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 337094668, + "step": 3476 + }, + { + "epoch": 0.3399491591709034, + "grad_norm": 25.195262368201668, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 337191408, + "step": 3477 + }, + { + "epoch": 0.3399491591709034, + "loss": 0.0949336439371109, + "loss_ce": 0.005501881241798401, + "loss_iou": 0.33203125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 337191408, + "step": 3477 + }, + { + "epoch": 0.34004692999608915, + "grad_norm": 9.88866810767011, + "learning_rate": 5e-05, + "loss": 0.1244, + "num_input_tokens_seen": 337288044, + "step": 3478 + }, + { + "epoch": 0.34004692999608915, + "loss": 0.10215680301189423, + "loss_ce": 0.003394288709387183, + "loss_iou": 0.453125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 337288044, + "step": 3478 + }, + { + "epoch": 0.34014470082127496, + "grad_norm": 2.3671582294853732, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 337384328, + "step": 3479 + }, + { + "epoch": 0.34014470082127496, + "loss": 0.05403449386358261, + "loss_ce": 0.009623789228498936, + "loss_iou": 0.27734375, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 337384328, + "step": 3479 + }, + { + "epoch": 0.3402424716464607, + "grad_norm": 7.019175980291536, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 337481376, + "step": 3480 + }, + { + "epoch": 0.3402424716464607, + "loss": 0.059158556163311005, + "loss_ce": 0.006744615733623505, + "loss_iou": 0.27734375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 337481376, + "step": 3480 + }, + { + "epoch": 0.34034024247164646, + "grad_norm": 6.948406177504064, + "learning_rate": 5e-05, + "loss": 0.1053, + "num_input_tokens_seen": 337577960, + "step": 3481 + }, + { + "epoch": 0.34034024247164646, + "loss": 0.10107292234897614, + "loss_ce": 0.00983680784702301, + "loss_iou": 0.20703125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 337577960, + "step": 3481 + }, + { + "epoch": 0.3404380132968322, + "grad_norm": 23.383076216514887, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 337674220, + "step": 3482 + }, + { + "epoch": 0.3404380132968322, + "loss": 0.07103852927684784, + "loss_ce": 0.0071347239427268505, + "loss_iou": 0.2490234375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 337674220, + "step": 3482 + }, + { + "epoch": 0.340535784122018, + "grad_norm": 6.300054323190865, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 337771792, + "step": 3483 + }, + { + "epoch": 0.340535784122018, + "loss": 0.05479712784290314, + "loss_ce": 0.003146126866340637, + "loss_iou": 0.31640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 337771792, + "step": 3483 + }, + { + "epoch": 0.34063355494720376, + "grad_norm": 6.914198695993488, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 337869892, + "step": 3484 + }, + { + "epoch": 0.34063355494720376, + "loss": 0.07211393862962723, + "loss_ce": 0.004654832184314728, + "loss_iou": 0.40234375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 337869892, + "step": 3484 + }, + { + "epoch": 0.3407313257723895, + "grad_norm": 6.545258150404917, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 337966140, + "step": 3485 + }, + { + "epoch": 0.3407313257723895, + "loss": 0.1131124496459961, + "loss_ce": 0.005812639836221933, + "loss_iou": 0.3046875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 337966140, + "step": 3485 + }, + { + "epoch": 0.34082909659757527, + "grad_norm": 7.0635032552686825, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 338062704, + "step": 3486 + }, + { + "epoch": 0.34082909659757527, + "loss": 0.04785019904375076, + "loss_ce": 0.003065655939280987, + "loss_iou": 0.326171875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 338062704, + "step": 3486 + }, + { + "epoch": 0.34092686742276107, + "grad_norm": 4.745579199079935, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 338160048, + "step": 3487 + }, + { + "epoch": 0.34092686742276107, + "loss": 0.1261129379272461, + "loss_ce": 0.006041525863111019, + "loss_iou": 0.330078125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 338160048, + "step": 3487 + }, + { + "epoch": 0.3410246382479468, + "grad_norm": 10.622122376454424, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 338257228, + "step": 3488 + }, + { + "epoch": 0.3410246382479468, + "loss": 0.05368943512439728, + "loss_ce": 0.002221541479229927, + "loss_iou": 0.32421875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 338257228, + "step": 3488 + }, + { + "epoch": 0.34112240907313257, + "grad_norm": 20.564112427982845, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 338353808, + "step": 3489 + }, + { + "epoch": 0.34112240907313257, + "loss": 0.07447470724582672, + "loss_ce": 0.003994361963123083, + "loss_iou": 0.3515625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 338353808, + "step": 3489 + }, + { + "epoch": 0.3412201798983183, + "grad_norm": 8.658863485487839, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 338450704, + "step": 3490 + }, + { + "epoch": 0.3412201798983183, + "loss": 0.0809309333562851, + "loss_ce": 0.0022871338296681643, + "loss_iou": 0.421875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 338450704, + "step": 3490 + }, + { + "epoch": 0.3413179507235041, + "grad_norm": 8.099278089286255, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 338547040, + "step": 3491 + }, + { + "epoch": 0.3413179507235041, + "loss": 0.0967583954334259, + "loss_ce": 0.004869967699050903, + "loss_iou": 0.3203125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 338547040, + "step": 3491 + }, + { + "epoch": 0.3414157215486899, + "grad_norm": 11.720887667197047, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 338643904, + "step": 3492 + }, + { + "epoch": 0.3414157215486899, + "loss": 0.05616620182991028, + "loss_ce": 0.005732092075049877, + "loss_iou": 0.302734375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 338643904, + "step": 3492 + }, + { + "epoch": 0.3415134923738756, + "grad_norm": 12.47972932239874, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 338740792, + "step": 3493 + }, + { + "epoch": 0.3415134923738756, + "loss": 0.09414905309677124, + "loss_ce": 0.00747913122177124, + "loss_iou": 0.291015625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 338740792, + "step": 3493 + }, + { + "epoch": 0.3416112631990614, + "grad_norm": 14.429619035233465, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 338838708, + "step": 3494 + }, + { + "epoch": 0.3416112631990614, + "loss": 0.05417631193995476, + "loss_ce": 0.005714396946132183, + "loss_iou": 0.3125, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 338838708, + "step": 3494 + }, + { + "epoch": 0.3417090340242472, + "grad_norm": 9.872015666866472, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 338935892, + "step": 3495 + }, + { + "epoch": 0.3417090340242472, + "loss": 0.0866013616323471, + "loss_ce": 0.008537395857274532, + "loss_iou": 0.267578125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 338935892, + "step": 3495 + }, + { + "epoch": 0.34180680484943293, + "grad_norm": 5.306148384928763, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 339032404, + "step": 3496 + }, + { + "epoch": 0.34180680484943293, + "loss": 0.07674093544483185, + "loss_ce": 0.008839319460093975, + "loss_iou": 0.314453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 339032404, + "step": 3496 + }, + { + "epoch": 0.3419045756746187, + "grad_norm": 10.751737529676204, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 339129640, + "step": 3497 + }, + { + "epoch": 0.3419045756746187, + "loss": 0.08538394421339035, + "loss_ce": 0.004329253453761339, + "loss_iou": 0.3359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 339129640, + "step": 3497 + }, + { + "epoch": 0.34200234649980443, + "grad_norm": 5.577364019205322, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 339227036, + "step": 3498 + }, + { + "epoch": 0.34200234649980443, + "loss": 0.04135984182357788, + "loss_ce": 0.0033349408768117428, + "loss_iou": 0.3203125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 339227036, + "step": 3498 + }, + { + "epoch": 0.34210011732499024, + "grad_norm": 4.397300095222573, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 339323420, + "step": 3499 + }, + { + "epoch": 0.34210011732499024, + "loss": 0.03148435056209564, + "loss_ce": 0.002384888008236885, + "loss_iou": 0.267578125, + "loss_num": 0.005828857421875, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 339323420, + "step": 3499 + }, + { + "epoch": 0.342197888150176, + "grad_norm": 18.89400586615014, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 339420592, + "step": 3500 + }, + { + "epoch": 0.342197888150176, + "eval_seeclick_CIoU": 0.4984448254108429, + "eval_seeclick_GIoU": 0.5131930261850357, + "eval_seeclick_IoU": 0.5446740090847015, + "eval_seeclick_MAE_all": 0.07413206622004509, + "eval_seeclick_MAE_h": 0.04578056558966637, + "eval_seeclick_MAE_w": 0.09238791838288307, + "eval_seeclick_MAE_x": 0.11437031254172325, + "eval_seeclick_MAE_y": 0.04398945905268192, + "eval_seeclick_NUM_probability": 0.9999879002571106, + "eval_seeclick_inside_bbox": 0.7542613744735718, + "eval_seeclick_loss": 0.2690686285495758, + "eval_seeclick_loss_ce": 0.010096464306116104, + "eval_seeclick_loss_iou": 0.4439697265625, + "eval_seeclick_loss_num": 0.05094146728515625, + "eval_seeclick_loss_xval": 0.25469970703125, + "eval_seeclick_runtime": 73.5427, + "eval_seeclick_samples_per_second": 0.585, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 339420592, + "step": 3500 + }, + { + "epoch": 0.342197888150176, + "eval_icons_CIoU": 0.636029452085495, + "eval_icons_GIoU": 0.6181391477584839, + "eval_icons_IoU": 0.6715376973152161, + "eval_icons_MAE_all": 0.06780015118420124, + "eval_icons_MAE_h": 0.0808899886906147, + "eval_icons_MAE_w": 0.05526186153292656, + "eval_icons_MAE_x": 0.05385393649339676, + "eval_icons_MAE_y": 0.08119482547044754, + "eval_icons_NUM_probability": 0.9999781250953674, + "eval_icons_inside_bbox": 0.8038194477558136, + "eval_icons_loss": 0.2076062262058258, + "eval_icons_loss_ce": 7.846565949876094e-06, + "eval_icons_loss_iou": 0.3736572265625, + "eval_icons_loss_num": 0.04400634765625, + "eval_icons_loss_xval": 0.220123291015625, + "eval_icons_runtime": 89.9981, + "eval_icons_samples_per_second": 0.556, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 339420592, + "step": 3500 + }, + { + "epoch": 0.342197888150176, + "eval_screenspot_CIoU": 0.34019245704015094, + "eval_screenspot_GIoU": 0.31602758665879566, + "eval_screenspot_IoU": 0.42139007647832233, + "eval_screenspot_MAE_all": 0.15132409830888113, + "eval_screenspot_MAE_h": 0.10119166225194931, + "eval_screenspot_MAE_w": 0.21665618071953455, + "eval_screenspot_MAE_x": 0.1883684148391088, + "eval_screenspot_MAE_y": 0.09908014287551244, + "eval_screenspot_NUM_probability": 0.9999720851580302, + "eval_screenspot_inside_bbox": 0.6850000023841858, + "eval_screenspot_loss": 0.5300085544586182, + "eval_screenspot_loss_ce": 0.01429946068674326, + "eval_screenspot_loss_iou": 0.3648274739583333, + "eval_screenspot_loss_num": 0.10389200846354167, + "eval_screenspot_loss_xval": 0.5195719401041666, + "eval_screenspot_runtime": 148.4809, + "eval_screenspot_samples_per_second": 0.599, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 339420592, + "step": 3500 + }, + { + "epoch": 0.342197888150176, + "eval_compot_CIoU": 0.4724971055984497, + "eval_compot_GIoU": 0.4611716866493225, + "eval_compot_IoU": 0.532768577337265, + "eval_compot_MAE_all": 0.0847703367471695, + "eval_compot_MAE_h": 0.06985202431678772, + "eval_compot_MAE_w": 0.10047328472137451, + "eval_compot_MAE_x": 0.09904879704117775, + "eval_compot_MAE_y": 0.06970723159611225, + "eval_compot_NUM_probability": 0.999978244304657, + "eval_compot_inside_bbox": 0.7118055522441864, + "eval_compot_loss": 0.2772562503814697, + "eval_compot_loss_ce": 0.01804725732654333, + "eval_compot_loss_iou": 0.5430908203125, + "eval_compot_loss_num": 0.045337677001953125, + "eval_compot_loss_xval": 0.2266387939453125, + "eval_compot_runtime": 87.2054, + "eval_compot_samples_per_second": 0.573, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 339420592, + "step": 3500 + }, + { + "epoch": 0.342197888150176, + "loss": 0.22242633998394012, + "loss_ce": 0.018752021715044975, + "loss_iou": 0.55078125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 339420592, + "step": 3500 + }, + { + "epoch": 0.34229565897536174, + "grad_norm": 7.674844847733392, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 339517600, + "step": 3501 + }, + { + "epoch": 0.34229565897536174, + "loss": 0.08392718434333801, + "loss_ce": 0.005298648029565811, + "loss_iou": 0.259765625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 339517600, + "step": 3501 + }, + { + "epoch": 0.34239342980054754, + "grad_norm": 12.39751415495033, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 339614928, + "step": 3502 + }, + { + "epoch": 0.34239342980054754, + "loss": 0.05546003207564354, + "loss_ce": 0.0055332728661596775, + "loss_iou": 0.404296875, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 339614928, + "step": 3502 + }, + { + "epoch": 0.3424912006257333, + "grad_norm": 22.543185738089928, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 339712748, + "step": 3503 + }, + { + "epoch": 0.3424912006257333, + "loss": 0.09108978509902954, + "loss_ce": 0.002070007845759392, + "loss_iou": 0.3359375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 339712748, + "step": 3503 + }, + { + "epoch": 0.34258897145091904, + "grad_norm": 10.959354324926819, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 339809236, + "step": 3504 + }, + { + "epoch": 0.34258897145091904, + "loss": 0.10416872799396515, + "loss_ce": 0.008877594955265522, + "loss_iou": 0.40625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 339809236, + "step": 3504 + }, + { + "epoch": 0.3426867422761048, + "grad_norm": 11.829823757901108, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 339905764, + "step": 3505 + }, + { + "epoch": 0.3426867422761048, + "loss": 0.06705448031425476, + "loss_ce": 0.007926670834422112, + "loss_iou": 0.30078125, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 339905764, + "step": 3505 + }, + { + "epoch": 0.3427845131012906, + "grad_norm": 4.151933633172038, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 340003800, + "step": 3506 + }, + { + "epoch": 0.3427845131012906, + "loss": 0.09936024248600006, + "loss_ce": 0.005427137948572636, + "loss_iou": 0.375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 340003800, + "step": 3506 + }, + { + "epoch": 0.34288228392647635, + "grad_norm": 4.369443782781884, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 340100520, + "step": 3507 + }, + { + "epoch": 0.34288228392647635, + "loss": 0.042465366423130035, + "loss_ce": 0.006126558408141136, + "loss_iou": 0.296875, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 340100520, + "step": 3507 + }, + { + "epoch": 0.3429800547516621, + "grad_norm": 11.044560064729628, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 340196704, + "step": 3508 + }, + { + "epoch": 0.3429800547516621, + "loss": 0.07976031303405762, + "loss_ce": 0.007403130643069744, + "loss_iou": 0.28125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 340196704, + "step": 3508 + }, + { + "epoch": 0.34307782557684785, + "grad_norm": 16.824387021719776, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 340294156, + "step": 3509 + }, + { + "epoch": 0.34307782557684785, + "loss": 0.1348009705543518, + "loss_ce": 0.014340451918542385, + "loss_iou": 0.2392578125, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 340294156, + "step": 3509 + }, + { + "epoch": 0.34317559640203366, + "grad_norm": 8.435377685939967, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 340391480, + "step": 3510 + }, + { + "epoch": 0.34317559640203366, + "loss": 0.10729025304317474, + "loss_ce": 0.005834558978676796, + "loss_iou": 0.333984375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 340391480, + "step": 3510 + }, + { + "epoch": 0.3432733672272194, + "grad_norm": 3.6547191936512085, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 340488272, + "step": 3511 + }, + { + "epoch": 0.3432733672272194, + "loss": 0.12542426586151123, + "loss_ce": 0.0042465864680707455, + "loss_iou": 0.2294921875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 340488272, + "step": 3511 + }, + { + "epoch": 0.34337113805240516, + "grad_norm": 3.1172822934475017, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 340586276, + "step": 3512 + }, + { + "epoch": 0.34337113805240516, + "loss": 0.09253869950771332, + "loss_ce": 0.006234990432858467, + "loss_iou": 0.294921875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 340586276, + "step": 3512 + }, + { + "epoch": 0.3434689088775909, + "grad_norm": 15.087318629890339, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 340682824, + "step": 3513 + }, + { + "epoch": 0.3434689088775909, + "loss": 0.06426943093538284, + "loss_ce": 0.0042413524352014065, + "loss_iou": 0.302734375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 340682824, + "step": 3513 + }, + { + "epoch": 0.3435666797027767, + "grad_norm": 5.821781940724702, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 340780144, + "step": 3514 + }, + { + "epoch": 0.3435666797027767, + "loss": 0.06373193114995956, + "loss_ce": 0.006145261228084564, + "loss_iou": 0.2353515625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 340780144, + "step": 3514 + }, + { + "epoch": 0.34366445052796246, + "grad_norm": 4.758140549730514, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 340875800, + "step": 3515 + }, + { + "epoch": 0.34366445052796246, + "loss": 0.06315899640321732, + "loss_ce": 0.00826931744813919, + "loss_iou": 0.185546875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 340875800, + "step": 3515 + }, + { + "epoch": 0.3437622213531482, + "grad_norm": 4.457113376626105, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 340972268, + "step": 3516 + }, + { + "epoch": 0.3437622213531482, + "loss": 0.11415558308362961, + "loss_ce": 0.006287388503551483, + "loss_iou": 0.24609375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 340972268, + "step": 3516 + }, + { + "epoch": 0.34385999217833396, + "grad_norm": 6.2866645886169055, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 341068580, + "step": 3517 + }, + { + "epoch": 0.34385999217833396, + "loss": 0.06608431041240692, + "loss_ce": 0.0026535268407315016, + "loss_iou": 0.322265625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 341068580, + "step": 3517 + }, + { + "epoch": 0.34395776300351977, + "grad_norm": 6.198785373924154, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 341165672, + "step": 3518 + }, + { + "epoch": 0.34395776300351977, + "loss": 0.07964944839477539, + "loss_ce": 0.0036149024963378906, + "loss_iou": 0.283203125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 341165672, + "step": 3518 + }, + { + "epoch": 0.3440555338287055, + "grad_norm": 23.966035690350594, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 341263200, + "step": 3519 + }, + { + "epoch": 0.3440555338287055, + "loss": 0.0877918154001236, + "loss_ce": 0.006454833783209324, + "loss_iou": 0.34765625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 341263200, + "step": 3519 + }, + { + "epoch": 0.34415330465389127, + "grad_norm": 15.111361416446373, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 341359372, + "step": 3520 + }, + { + "epoch": 0.34415330465389127, + "loss": 0.07092349231243134, + "loss_ce": 0.0039755599573254585, + "loss_iou": 0.224609375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 341359372, + "step": 3520 + }, + { + "epoch": 0.344251075479077, + "grad_norm": 13.797397132245935, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 341455608, + "step": 3521 + }, + { + "epoch": 0.344251075479077, + "loss": 0.10556932538747787, + "loss_ce": 0.007226427551358938, + "loss_iou": 0.318359375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 341455608, + "step": 3521 + }, + { + "epoch": 0.3443488463042628, + "grad_norm": 16.486251809731755, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 341552712, + "step": 3522 + }, + { + "epoch": 0.3443488463042628, + "loss": 0.07755762338638306, + "loss_ce": 0.005230958107858896, + "loss_iou": 0.255859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 341552712, + "step": 3522 + }, + { + "epoch": 0.3444466171294486, + "grad_norm": 7.823137730034828, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 341648920, + "step": 3523 + }, + { + "epoch": 0.3444466171294486, + "loss": 0.12961770594120026, + "loss_ce": 0.009286897256970406, + "loss_iou": 0.314453125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 341648920, + "step": 3523 + }, + { + "epoch": 0.3445443879546343, + "grad_norm": 14.497132209211863, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 341746832, + "step": 3524 + }, + { + "epoch": 0.3445443879546343, + "loss": 0.06257903575897217, + "loss_ce": 0.0064018950797617435, + "loss_iou": 0.265625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 341746832, + "step": 3524 + }, + { + "epoch": 0.34464215877982013, + "grad_norm": 22.037596370493077, + "learning_rate": 5e-05, + "loss": 0.1053, + "num_input_tokens_seen": 341844172, + "step": 3525 + }, + { + "epoch": 0.34464215877982013, + "loss": 0.1015157476067543, + "loss_ce": 0.004683472216129303, + "loss_iou": 0.349609375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 341844172, + "step": 3525 + }, + { + "epoch": 0.3447399296050059, + "grad_norm": 10.229839204008313, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 341940600, + "step": 3526 + }, + { + "epoch": 0.3447399296050059, + "loss": 0.07417749613523483, + "loss_ce": 0.009724368341267109, + "loss_iou": 0.26171875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 341940600, + "step": 3526 + }, + { + "epoch": 0.34483770043019163, + "grad_norm": 3.9279300730310482, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 342036308, + "step": 3527 + }, + { + "epoch": 0.34483770043019163, + "loss": 0.08633589744567871, + "loss_ce": 0.0029618749395012856, + "loss_iou": 0.328125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 342036308, + "step": 3527 + }, + { + "epoch": 0.3449354712553774, + "grad_norm": 2.8628875116570187, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 342132868, + "step": 3528 + }, + { + "epoch": 0.3449354712553774, + "loss": 0.16209328174591064, + "loss_ce": 0.010176773183047771, + "loss_iou": 0.263671875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 342132868, + "step": 3528 + }, + { + "epoch": 0.3450332420805632, + "grad_norm": 3.3647474071813344, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 342231168, + "step": 3529 + }, + { + "epoch": 0.3450332420805632, + "loss": 0.07214190065860748, + "loss_ce": 0.00614763842895627, + "loss_iou": 0.3671875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 342231168, + "step": 3529 + }, + { + "epoch": 0.34513101290574894, + "grad_norm": 8.083391984728362, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 342327692, + "step": 3530 + }, + { + "epoch": 0.34513101290574894, + "loss": 0.08505409955978394, + "loss_ce": 0.003373800776898861, + "loss_iou": 0.2734375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 342327692, + "step": 3530 + }, + { + "epoch": 0.3452287837309347, + "grad_norm": 14.415085209584134, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 342424436, + "step": 3531 + }, + { + "epoch": 0.3452287837309347, + "loss": 0.10104763507843018, + "loss_ce": 0.0033303452655673027, + "loss_iou": 0.36328125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 342424436, + "step": 3531 + }, + { + "epoch": 0.34532655455612044, + "grad_norm": 18.62075680267627, + "learning_rate": 5e-05, + "loss": 0.1448, + "num_input_tokens_seen": 342520320, + "step": 3532 + }, + { + "epoch": 0.34532655455612044, + "loss": 0.17651966214179993, + "loss_ce": 0.004034318029880524, + "loss_iou": 0.30859375, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 342520320, + "step": 3532 + }, + { + "epoch": 0.34542432538130624, + "grad_norm": 6.240274124725423, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 342616372, + "step": 3533 + }, + { + "epoch": 0.34542432538130624, + "loss": 0.08564409613609314, + "loss_ce": 0.0036128396168351173, + "loss_iou": 0.357421875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 342616372, + "step": 3533 + }, + { + "epoch": 0.345522096206492, + "grad_norm": 6.576691298208819, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 342713348, + "step": 3534 + }, + { + "epoch": 0.345522096206492, + "loss": 0.13122843205928802, + "loss_ce": 0.014895430766046047, + "loss_iou": 0.26171875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 342713348, + "step": 3534 + }, + { + "epoch": 0.34561986703167774, + "grad_norm": 13.166936163209638, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 342810376, + "step": 3535 + }, + { + "epoch": 0.34561986703167774, + "loss": 0.041198261082172394, + "loss_ce": 0.005515581928193569, + "loss_iou": 0.3125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 342810376, + "step": 3535 + }, + { + "epoch": 0.3457176378568635, + "grad_norm": 6.229344974268844, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 342907000, + "step": 3536 + }, + { + "epoch": 0.3457176378568635, + "loss": 0.062141433358192444, + "loss_ce": 0.00558092026039958, + "loss_iou": 0.34375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 342907000, + "step": 3536 + }, + { + "epoch": 0.3458154086820493, + "grad_norm": 3.488101699745254, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 343003440, + "step": 3537 + }, + { + "epoch": 0.3458154086820493, + "loss": 0.09949390590190887, + "loss_ce": 0.007483410649001598, + "loss_iou": 0.310546875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 343003440, + "step": 3537 + }, + { + "epoch": 0.34591317950723505, + "grad_norm": 5.442881416421809, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 343101452, + "step": 3538 + }, + { + "epoch": 0.34591317950723505, + "loss": 0.07920458912849426, + "loss_ce": 0.0027275364845991135, + "loss_iou": 0.419921875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 343101452, + "step": 3538 + }, + { + "epoch": 0.3460109503324208, + "grad_norm": 10.454753363371363, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 343198880, + "step": 3539 + }, + { + "epoch": 0.3460109503324208, + "loss": 0.0758609026670456, + "loss_ce": 0.0025119034107774496, + "loss_iou": 0.37109375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 343198880, + "step": 3539 + }, + { + "epoch": 0.34610872115760655, + "grad_norm": 5.42388180365944, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 343296284, + "step": 3540 + }, + { + "epoch": 0.34610872115760655, + "loss": 0.11744910478591919, + "loss_ce": 0.006914432160556316, + "loss_iou": 0.39453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 343296284, + "step": 3540 + }, + { + "epoch": 0.34620649198279235, + "grad_norm": 6.126435454343136, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 343393824, + "step": 3541 + }, + { + "epoch": 0.34620649198279235, + "loss": 0.09077252447605133, + "loss_ce": 0.0030802637338638306, + "loss_iou": 0.373046875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 343393824, + "step": 3541 + }, + { + "epoch": 0.3463042628079781, + "grad_norm": 5.2790442617273365, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 343489972, + "step": 3542 + }, + { + "epoch": 0.3463042628079781, + "loss": 0.06546716392040253, + "loss_ce": 0.009566589258611202, + "loss_iou": 0.3671875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 343489972, + "step": 3542 + }, + { + "epoch": 0.34640203363316385, + "grad_norm": 5.353707432025432, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 343587544, + "step": 3543 + }, + { + "epoch": 0.34640203363316385, + "loss": 0.058300163596868515, + "loss_ce": 0.00552764069288969, + "loss_iou": 0.287109375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 343587544, + "step": 3543 + }, + { + "epoch": 0.3464998044583496, + "grad_norm": 4.367747286296437, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 343684556, + "step": 3544 + }, + { + "epoch": 0.3464998044583496, + "loss": 0.059861600399017334, + "loss_ce": 0.0029463183600455523, + "loss_iou": 0.296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 343684556, + "step": 3544 + }, + { + "epoch": 0.3465975752835354, + "grad_norm": 3.228927353198435, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 343781244, + "step": 3545 + }, + { + "epoch": 0.3465975752835354, + "loss": 0.060233987867832184, + "loss_ce": 0.002662572544068098, + "loss_iou": 0.2177734375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 343781244, + "step": 3545 + }, + { + "epoch": 0.34669534610872116, + "grad_norm": 2.2461918790849293, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 343878248, + "step": 3546 + }, + { + "epoch": 0.34669534610872116, + "loss": 0.09604081511497498, + "loss_ce": 0.005090801976621151, + "loss_iou": 0.279296875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 343878248, + "step": 3546 + }, + { + "epoch": 0.3467931169339069, + "grad_norm": 14.695904508836792, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 343976216, + "step": 3547 + }, + { + "epoch": 0.3467931169339069, + "loss": 0.10172443091869354, + "loss_ce": 0.008554269559681416, + "loss_iou": 0.34765625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 343976216, + "step": 3547 + }, + { + "epoch": 0.3468908877590927, + "grad_norm": 15.391582773201296, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 344073488, + "step": 3548 + }, + { + "epoch": 0.3468908877590927, + "loss": 0.09082300215959549, + "loss_ce": 0.00439721392467618, + "loss_iou": 0.298828125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 344073488, + "step": 3548 + }, + { + "epoch": 0.34698865858427846, + "grad_norm": 12.078858168891017, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 344170236, + "step": 3549 + }, + { + "epoch": 0.34698865858427846, + "loss": 0.08712208271026611, + "loss_ce": 0.0023976543452590704, + "loss_iou": 0.228515625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 344170236, + "step": 3549 + }, + { + "epoch": 0.3470864294094642, + "grad_norm": 17.36681678350258, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 344266428, + "step": 3550 + }, + { + "epoch": 0.3470864294094642, + "loss": 0.06620907783508301, + "loss_ce": 0.005273099523037672, + "loss_iou": 0.3125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 344266428, + "step": 3550 + }, + { + "epoch": 0.34718420023464996, + "grad_norm": 16.690871374427424, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 344363200, + "step": 3551 + }, + { + "epoch": 0.34718420023464996, + "loss": 0.0990733951330185, + "loss_ce": 0.009126648306846619, + "loss_iou": 0.25, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 344363200, + "step": 3551 + }, + { + "epoch": 0.34728197105983577, + "grad_norm": 9.429102670800834, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 344459184, + "step": 3552 + }, + { + "epoch": 0.34728197105983577, + "loss": 0.06387029588222504, + "loss_ce": 0.005123952403664589, + "loss_iou": 0.267578125, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 344459184, + "step": 3552 + }, + { + "epoch": 0.3473797418850215, + "grad_norm": 4.613618702769787, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 344556024, + "step": 3553 + }, + { + "epoch": 0.3473797418850215, + "loss": 0.1168988049030304, + "loss_ce": 0.004884037189185619, + "loss_iou": 0.330078125, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 344556024, + "step": 3553 + }, + { + "epoch": 0.34747751271020727, + "grad_norm": 5.602073705006768, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 344651748, + "step": 3554 + }, + { + "epoch": 0.34747751271020727, + "loss": 0.07484833896160126, + "loss_ce": 0.0074998619966208935, + "loss_iou": 0.173828125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 344651748, + "step": 3554 + }, + { + "epoch": 0.347575283535393, + "grad_norm": 14.657699562670578, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 344748056, + "step": 3555 + }, + { + "epoch": 0.347575283535393, + "loss": 0.08629891276359558, + "loss_ce": 0.0044355131685733795, + "loss_iou": 0.27734375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 344748056, + "step": 3555 + }, + { + "epoch": 0.3476730543605788, + "grad_norm": 4.1163849524587794, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 344845012, + "step": 3556 + }, + { + "epoch": 0.3476730543605788, + "loss": 0.06756363064050674, + "loss_ce": 0.005185696296393871, + "loss_iou": 0.302734375, + "loss_num": 0.012451171875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 344845012, + "step": 3556 + }, + { + "epoch": 0.3477708251857646, + "grad_norm": 27.05130013646712, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 344941924, + "step": 3557 + }, + { + "epoch": 0.3477708251857646, + "loss": 0.06729575991630554, + "loss_ce": 0.008068768307566643, + "loss_iou": 0.29296875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 344941924, + "step": 3557 + }, + { + "epoch": 0.3478685960109503, + "grad_norm": 12.068288938315208, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 345038872, + "step": 3558 + }, + { + "epoch": 0.3478685960109503, + "loss": 0.06285054981708527, + "loss_ce": 0.006331996060907841, + "loss_iou": 0.35546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 345038872, + "step": 3558 + }, + { + "epoch": 0.3479663668361361, + "grad_norm": 7.249940653104951, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 345136724, + "step": 3559 + }, + { + "epoch": 0.3479663668361361, + "loss": 0.12171435356140137, + "loss_ce": 0.006144287995994091, + "loss_iou": 0.3515625, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 345136724, + "step": 3559 + }, + { + "epoch": 0.3480641376613219, + "grad_norm": 5.922461187170748, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 345233612, + "step": 3560 + }, + { + "epoch": 0.3480641376613219, + "loss": 0.07513521611690521, + "loss_ce": 0.0054941060952842236, + "loss_iou": 0.2734375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 345233612, + "step": 3560 + }, + { + "epoch": 0.34816190848650763, + "grad_norm": 10.013603333436606, + "learning_rate": 5e-05, + "loss": 0.1475, + "num_input_tokens_seen": 345331412, + "step": 3561 + }, + { + "epoch": 0.34816190848650763, + "loss": 0.1393601894378662, + "loss_ce": 0.009191278368234634, + "loss_iou": 0.40625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 345331412, + "step": 3561 + }, + { + "epoch": 0.3482596793116934, + "grad_norm": 11.580089917970765, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 345428436, + "step": 3562 + }, + { + "epoch": 0.3482596793116934, + "loss": 0.08893933892250061, + "loss_ce": 0.005855230148881674, + "loss_iou": 0.3046875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 345428436, + "step": 3562 + }, + { + "epoch": 0.34835745013687913, + "grad_norm": 8.54713029118798, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 345525788, + "step": 3563 + }, + { + "epoch": 0.34835745013687913, + "loss": 0.10850973427295685, + "loss_ce": 0.0032088253647089005, + "loss_iou": 0.34765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 345525788, + "step": 3563 + }, + { + "epoch": 0.34845522096206494, + "grad_norm": 5.388130291719913, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 345621764, + "step": 3564 + }, + { + "epoch": 0.34845522096206494, + "loss": 0.08294099569320679, + "loss_ce": 0.006883557885885239, + "loss_iou": 0.3046875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 345621764, + "step": 3564 + }, + { + "epoch": 0.3485529917872507, + "grad_norm": 17.750877392853646, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 345718528, + "step": 3565 + }, + { + "epoch": 0.3485529917872507, + "loss": 0.1076495498418808, + "loss_ce": 0.010176405310630798, + "loss_iou": 0.255859375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 345718528, + "step": 3565 + }, + { + "epoch": 0.34865076261243644, + "grad_norm": 7.684655502504257, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 345815556, + "step": 3566 + }, + { + "epoch": 0.34865076261243644, + "loss": 0.11207529902458191, + "loss_ce": 0.009322612546384335, + "loss_iou": 0.251953125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 345815556, + "step": 3566 + }, + { + "epoch": 0.3487485334376222, + "grad_norm": 2.2211239864806083, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 345912384, + "step": 3567 + }, + { + "epoch": 0.3487485334376222, + "loss": 0.06705262511968613, + "loss_ce": 0.006719373632222414, + "loss_iou": 0.244140625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 345912384, + "step": 3567 + }, + { + "epoch": 0.348846304262808, + "grad_norm": 1.913666141010602, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 346008120, + "step": 3568 + }, + { + "epoch": 0.348846304262808, + "loss": 0.04501333087682724, + "loss_ce": 0.006492515094578266, + "loss_iou": 0.25390625, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 346008120, + "step": 3568 + }, + { + "epoch": 0.34894407508799374, + "grad_norm": 12.280324137127822, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 346105620, + "step": 3569 + }, + { + "epoch": 0.34894407508799374, + "loss": 0.10746486485004425, + "loss_ce": 0.006054954137653112, + "loss_iou": 0.35546875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 346105620, + "step": 3569 + }, + { + "epoch": 0.3490418459131795, + "grad_norm": 14.27213529854587, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 346203656, + "step": 3570 + }, + { + "epoch": 0.3490418459131795, + "loss": 0.11322655528783798, + "loss_ce": 0.003271722234785557, + "loss_iou": 0.474609375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 346203656, + "step": 3570 + }, + { + "epoch": 0.3491396167383653, + "grad_norm": 33.45576836974017, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 346301080, + "step": 3571 + }, + { + "epoch": 0.3491396167383653, + "loss": 0.10340270400047302, + "loss_ce": 0.009858695790171623, + "loss_iou": 0.4140625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 346301080, + "step": 3571 + }, + { + "epoch": 0.34923738756355105, + "grad_norm": 27.583277396902126, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 346397452, + "step": 3572 + }, + { + "epoch": 0.34923738756355105, + "loss": 0.105223149061203, + "loss_ce": 0.005888435989618301, + "loss_iou": 0.3203125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 346397452, + "step": 3572 + }, + { + "epoch": 0.3493351583887368, + "grad_norm": 7.562772444850915, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 346494584, + "step": 3573 + }, + { + "epoch": 0.3493351583887368, + "loss": 0.07371407747268677, + "loss_ce": 0.006300753448158503, + "loss_iou": 0.271484375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 346494584, + "step": 3573 + }, + { + "epoch": 0.34943292921392255, + "grad_norm": 5.151612095852065, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 346590212, + "step": 3574 + }, + { + "epoch": 0.34943292921392255, + "loss": 0.06474929302930832, + "loss_ce": 0.0030427537858486176, + "loss_iou": 0.224609375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 346590212, + "step": 3574 + }, + { + "epoch": 0.34953070003910836, + "grad_norm": 5.353136086324406, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 346686964, + "step": 3575 + }, + { + "epoch": 0.34953070003910836, + "loss": 0.097646065056324, + "loss_ce": 0.005635567009449005, + "loss_iou": 0.373046875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 346686964, + "step": 3575 + }, + { + "epoch": 0.3496284708642941, + "grad_norm": 15.55136937014516, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 346784496, + "step": 3576 + }, + { + "epoch": 0.3496284708642941, + "loss": 0.09296193718910217, + "loss_ce": 0.0052696717903018, + "loss_iou": 0.3125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 346784496, + "step": 3576 + }, + { + "epoch": 0.34972624168947986, + "grad_norm": 35.654803115355335, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 346881636, + "step": 3577 + }, + { + "epoch": 0.34972624168947986, + "loss": 0.05722709745168686, + "loss_ce": 0.003027877537533641, + "loss_iou": 0.384765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 346881636, + "step": 3577 + }, + { + "epoch": 0.3498240125146656, + "grad_norm": 9.868734923798192, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 346977920, + "step": 3578 + }, + { + "epoch": 0.3498240125146656, + "loss": 0.08181211352348328, + "loss_ce": 0.011652199551463127, + "loss_iou": 0.294921875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 346977920, + "step": 3578 + }, + { + "epoch": 0.3499217833398514, + "grad_norm": 9.081271543010175, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 347075972, + "step": 3579 + }, + { + "epoch": 0.3499217833398514, + "loss": 0.1185750663280487, + "loss_ce": 0.016066525131464005, + "loss_iou": 0.33203125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 347075972, + "step": 3579 + }, + { + "epoch": 0.35001955416503716, + "grad_norm": 8.866427918864611, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 347173600, + "step": 3580 + }, + { + "epoch": 0.35001955416503716, + "loss": 0.06811344623565674, + "loss_ce": 0.006155128590762615, + "loss_iou": 0.2734375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 347173600, + "step": 3580 + }, + { + "epoch": 0.3501173249902229, + "grad_norm": 13.06688121817021, + "learning_rate": 5e-05, + "loss": 0.4828, + "num_input_tokens_seen": 347270672, + "step": 3581 + }, + { + "epoch": 0.3501173249902229, + "loss": 0.421314537525177, + "loss_ce": 0.3015483021736145, + "loss_iou": 0.33984375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 347270672, + "step": 3581 + }, + { + "epoch": 0.35021509581540866, + "grad_norm": 33.67053557064953, + "learning_rate": 5e-05, + "loss": 0.261, + "num_input_tokens_seen": 347367244, + "step": 3582 + }, + { + "epoch": 0.35021509581540866, + "loss": 0.23590520024299622, + "loss_ce": 0.15390446782112122, + "loss_iou": 0.32421875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 347367244, + "step": 3582 + }, + { + "epoch": 0.35031286664059447, + "grad_norm": 46.80380485883818, + "learning_rate": 5e-05, + "loss": 0.4143, + "num_input_tokens_seen": 347464760, + "step": 3583 + }, + { + "epoch": 0.35031286664059447, + "loss": 0.345334529876709, + "loss_ce": 0.272794246673584, + "loss_iou": 0.40625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 347464760, + "step": 3583 + }, + { + "epoch": 0.3504106374657802, + "grad_norm": 15.610326429243232, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 347562088, + "step": 3584 + }, + { + "epoch": 0.3504106374657802, + "loss": 0.071723572909832, + "loss_ce": 0.0107341930270195, + "loss_iou": 0.470703125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 347562088, + "step": 3584 + }, + { + "epoch": 0.35050840829096597, + "grad_norm": 11.927052140805797, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 347659288, + "step": 3585 + }, + { + "epoch": 0.35050840829096597, + "loss": 0.08027070760726929, + "loss_ce": 0.003381673712283373, + "loss_iou": 0.427734375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 347659288, + "step": 3585 + }, + { + "epoch": 0.3506061791161517, + "grad_norm": 23.80758452137746, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 347757036, + "step": 3586 + }, + { + "epoch": 0.3506061791161517, + "loss": 0.06136079132556915, + "loss_ce": 0.003804638050496578, + "loss_iou": 0.39453125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 347757036, + "step": 3586 + }, + { + "epoch": 0.3507039499413375, + "grad_norm": 5.307818152072025, + "learning_rate": 5e-05, + "loss": 0.1309, + "num_input_tokens_seen": 347853864, + "step": 3587 + }, + { + "epoch": 0.3507039499413375, + "loss": 0.12547817826271057, + "loss_ce": 0.0031637169886380434, + "loss_iou": 0.357421875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 347853864, + "step": 3587 + }, + { + "epoch": 0.3508017207665233, + "grad_norm": 19.707388114569124, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 347950288, + "step": 3588 + }, + { + "epoch": 0.3508017207665233, + "loss": 0.13046421110630035, + "loss_ce": 0.007081641815602779, + "loss_iou": 0.34375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 347950288, + "step": 3588 + }, + { + "epoch": 0.350899491591709, + "grad_norm": 15.437061900440431, + "learning_rate": 5e-05, + "loss": 0.1271, + "num_input_tokens_seen": 348046916, + "step": 3589 + }, + { + "epoch": 0.350899491591709, + "loss": 0.13043782114982605, + "loss_ce": 0.006444910075515509, + "loss_iou": 0.318359375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 348046916, + "step": 3589 + }, + { + "epoch": 0.3509972624168948, + "grad_norm": 21.643870271704856, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 348143200, + "step": 3590 + }, + { + "epoch": 0.3509972624168948, + "loss": 0.09175777435302734, + "loss_ce": 0.012167933396995068, + "loss_iou": 0.265625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 348143200, + "step": 3590 + }, + { + "epoch": 0.3510950332420806, + "grad_norm": 14.480029548292938, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 348239808, + "step": 3591 + }, + { + "epoch": 0.3510950332420806, + "loss": 0.09343621134757996, + "loss_ce": 0.003729794407263398, + "loss_iou": 0.275390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 348239808, + "step": 3591 + }, + { + "epoch": 0.35119280406726633, + "grad_norm": 35.6873000143535, + "learning_rate": 5e-05, + "loss": 0.1246, + "num_input_tokens_seen": 348337360, + "step": 3592 + }, + { + "epoch": 0.35119280406726633, + "loss": 0.08632253110408783, + "loss_ce": 0.0030095400288701057, + "loss_iou": 0.42578125, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 348337360, + "step": 3592 + }, + { + "epoch": 0.3512905748924521, + "grad_norm": 17.838270631949413, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 348433880, + "step": 3593 + }, + { + "epoch": 0.3512905748924521, + "loss": 0.07643716037273407, + "loss_ce": 0.004507226869463921, + "loss_iou": 0.421875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 348433880, + "step": 3593 + }, + { + "epoch": 0.3513883457176379, + "grad_norm": 3.2872162446605104, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 348530648, + "step": 3594 + }, + { + "epoch": 0.3513883457176379, + "loss": 0.06792338937520981, + "loss_ce": 0.0035312986001372337, + "loss_iou": 0.291015625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 348530648, + "step": 3594 + }, + { + "epoch": 0.35148611654282363, + "grad_norm": 16.196589464194993, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 348627572, + "step": 3595 + }, + { + "epoch": 0.35148611654282363, + "loss": 0.14270251989364624, + "loss_ce": 0.005762523040175438, + "loss_iou": 0.267578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 348627572, + "step": 3595 + }, + { + "epoch": 0.3515838873680094, + "grad_norm": 21.441866339539214, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 348724852, + "step": 3596 + }, + { + "epoch": 0.3515838873680094, + "loss": 0.07624640315771103, + "loss_ce": 0.006178045179694891, + "loss_iou": 0.3203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 348724852, + "step": 3596 + }, + { + "epoch": 0.35168165819319513, + "grad_norm": 24.95420662007724, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 348821492, + "step": 3597 + }, + { + "epoch": 0.35168165819319513, + "loss": 0.08150406181812286, + "loss_ce": 0.004836278967559338, + "loss_iou": 0.369140625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 348821492, + "step": 3597 + }, + { + "epoch": 0.35177942901838094, + "grad_norm": 5.397096786962579, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 348918544, + "step": 3598 + }, + { + "epoch": 0.35177942901838094, + "loss": 0.07836989313364029, + "loss_ce": 0.002167501486837864, + "loss_iou": 0.279296875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 348918544, + "step": 3598 + }, + { + "epoch": 0.3518771998435667, + "grad_norm": 3.379085684616552, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 349014796, + "step": 3599 + }, + { + "epoch": 0.3518771998435667, + "loss": 0.07667829841375351, + "loss_ce": 0.004015941638499498, + "loss_iou": 0.28125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 349014796, + "step": 3599 + }, + { + "epoch": 0.35197497066875244, + "grad_norm": 9.630214515996009, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 349111252, + "step": 3600 + }, + { + "epoch": 0.35197497066875244, + "loss": 0.04106076806783676, + "loss_ce": 0.0022233352065086365, + "loss_iou": 0.2099609375, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 349111252, + "step": 3600 + }, + { + "epoch": 0.3520727414939382, + "grad_norm": 11.745067815052922, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 349208712, + "step": 3601 + }, + { + "epoch": 0.3520727414939382, + "loss": 0.08120112121105194, + "loss_ce": 0.00553278811275959, + "loss_iou": 0.28125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 349208712, + "step": 3601 + }, + { + "epoch": 0.352170512319124, + "grad_norm": 10.836811027636346, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 349306776, + "step": 3602 + }, + { + "epoch": 0.352170512319124, + "loss": 0.09165913611650467, + "loss_ce": 0.005569045431911945, + "loss_iou": 0.298828125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 349306776, + "step": 3602 + }, + { + "epoch": 0.35226828314430975, + "grad_norm": 5.669939651111403, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 349404296, + "step": 3603 + }, + { + "epoch": 0.35226828314430975, + "loss": 0.05687297508120537, + "loss_ce": 0.01137126237154007, + "loss_iou": 0.328125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 349404296, + "step": 3603 + }, + { + "epoch": 0.3523660539694955, + "grad_norm": 29.778583048210944, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 349501508, + "step": 3604 + }, + { + "epoch": 0.3523660539694955, + "loss": 0.11374472081661224, + "loss_ce": 0.006689051166176796, + "loss_iou": 0.2890625, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 349501508, + "step": 3604 + }, + { + "epoch": 0.35246382479468125, + "grad_norm": 2.903329072292981, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 349597560, + "step": 3605 + }, + { + "epoch": 0.35246382479468125, + "loss": 0.11745759844779968, + "loss_ce": 0.0076133920811116695, + "loss_iou": 0.197265625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 349597560, + "step": 3605 + }, + { + "epoch": 0.35256159561986705, + "grad_norm": 11.91372988407376, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 349694384, + "step": 3606 + }, + { + "epoch": 0.35256159561986705, + "loss": 0.07798229902982712, + "loss_ce": 0.0025733637157827616, + "loss_iou": 0.326171875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 349694384, + "step": 3606 + }, + { + "epoch": 0.3526593664450528, + "grad_norm": 3.5843514183584677, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 349791640, + "step": 3607 + }, + { + "epoch": 0.3526593664450528, + "loss": 0.04970866069197655, + "loss_ce": 0.008235270157456398, + "loss_iou": 0.30859375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 349791640, + "step": 3607 + }, + { + "epoch": 0.35275713727023855, + "grad_norm": 7.12031503437252, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 349888020, + "step": 3608 + }, + { + "epoch": 0.35275713727023855, + "loss": 0.13316279649734497, + "loss_ce": 0.005515404511243105, + "loss_iou": 0.1826171875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 349888020, + "step": 3608 + }, + { + "epoch": 0.3528549080954243, + "grad_norm": 8.281994483743624, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 349984440, + "step": 3609 + }, + { + "epoch": 0.3528549080954243, + "loss": 0.07918192446231842, + "loss_ce": 0.003772992640733719, + "loss_iou": 0.291015625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 349984440, + "step": 3609 + }, + { + "epoch": 0.3529526789206101, + "grad_norm": 11.340806552099012, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 350081084, + "step": 3610 + }, + { + "epoch": 0.3529526789206101, + "loss": 0.08824396133422852, + "loss_ce": 0.006380555219948292, + "loss_iou": 0.216796875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 350081084, + "step": 3610 + }, + { + "epoch": 0.35305044974579586, + "grad_norm": 4.193890022207742, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 350178136, + "step": 3611 + }, + { + "epoch": 0.35305044974579586, + "loss": 0.07658828049898148, + "loss_ce": 0.004208212718367577, + "loss_iou": 0.2353515625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 350178136, + "step": 3611 + }, + { + "epoch": 0.3531482205709816, + "grad_norm": 17.10596494741269, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 350274952, + "step": 3612 + }, + { + "epoch": 0.3531482205709816, + "loss": 0.06469416618347168, + "loss_ce": 0.006039380561560392, + "loss_iou": 0.314453125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 350274952, + "step": 3612 + }, + { + "epoch": 0.35324599139616736, + "grad_norm": 9.849781737839075, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 350371920, + "step": 3613 + }, + { + "epoch": 0.35324599139616736, + "loss": 0.07702367752790451, + "loss_ce": 0.007340601645410061, + "loss_iou": 0.26171875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 350371920, + "step": 3613 + }, + { + "epoch": 0.35334376222135316, + "grad_norm": 8.647348958060935, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 350468980, + "step": 3614 + }, + { + "epoch": 0.35334376222135316, + "loss": 0.09916732460260391, + "loss_ce": 0.004990076646208763, + "loss_iou": 0.326171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 350468980, + "step": 3614 + }, + { + "epoch": 0.3534415330465389, + "grad_norm": 23.014096981682645, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 350566400, + "step": 3615 + }, + { + "epoch": 0.3534415330465389, + "loss": 0.055917467921972275, + "loss_ce": 0.00392314326018095, + "loss_iou": 0.3671875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 350566400, + "step": 3615 + }, + { + "epoch": 0.35353930387172466, + "grad_norm": 21.794272706189165, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 350664324, + "step": 3616 + }, + { + "epoch": 0.35353930387172466, + "loss": 0.10006597638130188, + "loss_ce": 0.004942678846418858, + "loss_iou": 0.388671875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 350664324, + "step": 3616 + }, + { + "epoch": 0.35363707469691047, + "grad_norm": 11.56263632784165, + "learning_rate": 5e-05, + "loss": 0.1228, + "num_input_tokens_seen": 350761668, + "step": 3617 + }, + { + "epoch": 0.35363707469691047, + "loss": 0.18778063356876373, + "loss_ce": 0.008520379662513733, + "loss_iou": 0.32421875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 350761668, + "step": 3617 + }, + { + "epoch": 0.3537348455220962, + "grad_norm": 5.310986385724185, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 350858048, + "step": 3618 + }, + { + "epoch": 0.3537348455220962, + "loss": 0.06675925850868225, + "loss_ce": 0.005815649870783091, + "loss_iou": 0.197265625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 350858048, + "step": 3618 + }, + { + "epoch": 0.35383261634728197, + "grad_norm": 6.743820210698422, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 350954668, + "step": 3619 + }, + { + "epoch": 0.35383261634728197, + "loss": 0.05390126258134842, + "loss_ce": 0.00541645847260952, + "loss_iou": 0.275390625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 350954668, + "step": 3619 + }, + { + "epoch": 0.3539303871724677, + "grad_norm": 6.859792606382458, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 351052416, + "step": 3620 + }, + { + "epoch": 0.3539303871724677, + "loss": 0.10372862964868546, + "loss_ce": 0.0038903665263205767, + "loss_iou": 0.291015625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 351052416, + "step": 3620 + }, + { + "epoch": 0.3540281579976535, + "grad_norm": 11.092502555294807, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 351148996, + "step": 3621 + }, + { + "epoch": 0.3540281579976535, + "loss": 0.07563924044370651, + "loss_ce": 0.006433001719415188, + "loss_iou": 0.296875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 351148996, + "step": 3621 + }, + { + "epoch": 0.3541259288228393, + "grad_norm": 18.878059661051196, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 351245432, + "step": 3622 + }, + { + "epoch": 0.3541259288228393, + "loss": 0.07616633176803589, + "loss_ce": 0.00448816642165184, + "loss_iou": 0.2041015625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 351245432, + "step": 3622 + }, + { + "epoch": 0.354223699648025, + "grad_norm": 3.9902174126135264, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 351341852, + "step": 3623 + }, + { + "epoch": 0.354223699648025, + "loss": 0.08395210653543472, + "loss_ce": 0.003935015760362148, + "loss_iou": 0.341796875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 351341852, + "step": 3623 + }, + { + "epoch": 0.3543214704732108, + "grad_norm": 4.823798212804981, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 351438760, + "step": 3624 + }, + { + "epoch": 0.3543214704732108, + "loss": 0.09293660521507263, + "loss_ce": 0.007166951894760132, + "loss_iou": 0.298828125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 351438760, + "step": 3624 + }, + { + "epoch": 0.3544192412983966, + "grad_norm": 19.857162815021592, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 351535784, + "step": 3625 + }, + { + "epoch": 0.3544192412983966, + "loss": 0.07001572847366333, + "loss_ce": 0.0031479012686759233, + "loss_iou": 0.259765625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 351535784, + "step": 3625 + }, + { + "epoch": 0.35451701212358233, + "grad_norm": 10.352897072205868, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 351632588, + "step": 3626 + }, + { + "epoch": 0.35451701212358233, + "loss": 0.1835247278213501, + "loss_ce": 0.006568551994860172, + "loss_iou": 0.28125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 351632588, + "step": 3626 + }, + { + "epoch": 0.3546147829487681, + "grad_norm": 11.415271092995296, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 351729220, + "step": 3627 + }, + { + "epoch": 0.3546147829487681, + "loss": 0.07876750081777573, + "loss_ce": 0.002893173135817051, + "loss_iou": 0.263671875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 351729220, + "step": 3627 + }, + { + "epoch": 0.35471255377395383, + "grad_norm": 4.005540644423291, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 351826432, + "step": 3628 + }, + { + "epoch": 0.35471255377395383, + "loss": 0.06380835175514221, + "loss_ce": 0.0021018064580857754, + "loss_iou": 0.3125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 351826432, + "step": 3628 + }, + { + "epoch": 0.35481032459913964, + "grad_norm": 13.547257224578006, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 351924712, + "step": 3629 + }, + { + "epoch": 0.35481032459913964, + "loss": 0.07142822444438934, + "loss_ce": 0.002557681640610099, + "loss_iou": 0.419921875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 351924712, + "step": 3629 + }, + { + "epoch": 0.3549080954243254, + "grad_norm": 18.029134429750087, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 352021980, + "step": 3630 + }, + { + "epoch": 0.3549080954243254, + "loss": 0.07193852961063385, + "loss_ce": 0.008522998541593552, + "loss_iou": 0.34765625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 352021980, + "step": 3630 + }, + { + "epoch": 0.35500586624951114, + "grad_norm": 12.090570470773295, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 352119524, + "step": 3631 + }, + { + "epoch": 0.35500586624951114, + "loss": 0.0909666121006012, + "loss_ce": 0.011132622137665749, + "loss_iou": 0.3828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 352119524, + "step": 3631 + }, + { + "epoch": 0.3551036370746969, + "grad_norm": 15.496471910992108, + "learning_rate": 5e-05, + "loss": 0.115, + "num_input_tokens_seen": 352217124, + "step": 3632 + }, + { + "epoch": 0.3551036370746969, + "loss": 0.14111235737800598, + "loss_ce": 0.005827937740832567, + "loss_iou": 0.359375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 352217124, + "step": 3632 + }, + { + "epoch": 0.3552014078998827, + "grad_norm": 10.748285307102552, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 352314420, + "step": 3633 + }, + { + "epoch": 0.3552014078998827, + "loss": 0.07677951455116272, + "loss_ce": 0.003499177750200033, + "loss_iou": 0.384765625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 352314420, + "step": 3633 + }, + { + "epoch": 0.35529917872506844, + "grad_norm": 6.488250124386291, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 352412376, + "step": 3634 + }, + { + "epoch": 0.35529917872506844, + "loss": 0.0725947916507721, + "loss_ce": 0.00621906528249383, + "loss_iou": 0.349609375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 352412376, + "step": 3634 + }, + { + "epoch": 0.3553969495502542, + "grad_norm": 11.406766624019292, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 352509396, + "step": 3635 + }, + { + "epoch": 0.3553969495502542, + "loss": 0.07568976283073425, + "loss_ce": 0.008535838685929775, + "loss_iou": 0.279296875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 352509396, + "step": 3635 + }, + { + "epoch": 0.35549472037543994, + "grad_norm": 9.747666383317664, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 352606464, + "step": 3636 + }, + { + "epoch": 0.35549472037543994, + "loss": 0.06563682854175568, + "loss_ce": 0.005578235723078251, + "loss_iou": 0.314453125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 352606464, + "step": 3636 + }, + { + "epoch": 0.35559249120062575, + "grad_norm": 3.9729201350947516, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 352703876, + "step": 3637 + }, + { + "epoch": 0.35559249120062575, + "loss": 0.06092148646712303, + "loss_ce": 0.00217514974065125, + "loss_iou": 0.390625, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 352703876, + "step": 3637 + }, + { + "epoch": 0.3556902620258115, + "grad_norm": 13.696975177404616, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 352801292, + "step": 3638 + }, + { + "epoch": 0.3556902620258115, + "loss": 0.06903340667486191, + "loss_ce": 0.005862017627805471, + "loss_iou": 0.365234375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 352801292, + "step": 3638 + }, + { + "epoch": 0.35578803285099725, + "grad_norm": 45.92700393895133, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 352898248, + "step": 3639 + }, + { + "epoch": 0.35578803285099725, + "loss": 0.11606992781162262, + "loss_ce": 0.004253519233316183, + "loss_iou": 0.4375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 352898248, + "step": 3639 + }, + { + "epoch": 0.35588580367618305, + "grad_norm": 32.19370650233936, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 352995112, + "step": 3640 + }, + { + "epoch": 0.35588580367618305, + "loss": 0.0580412819981575, + "loss_ce": 0.008824056014418602, + "loss_iou": 0.2490234375, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 352995112, + "step": 3640 + }, + { + "epoch": 0.3559835745013688, + "grad_norm": 3.5434914559510413, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 353092004, + "step": 3641 + }, + { + "epoch": 0.3559835745013688, + "loss": 0.08287442475557327, + "loss_ce": 0.004169590305536985, + "loss_iou": 0.337890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 353092004, + "step": 3641 + }, + { + "epoch": 0.35608134532655455, + "grad_norm": 16.606977985907214, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 353188776, + "step": 3642 + }, + { + "epoch": 0.35608134532655455, + "loss": 0.06493046134710312, + "loss_ce": 0.00876285694539547, + "loss_iou": 0.30859375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 353188776, + "step": 3642 + }, + { + "epoch": 0.3561791161517403, + "grad_norm": 7.797053857991116, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 353285788, + "step": 3643 + }, + { + "epoch": 0.3561791161517403, + "loss": 0.08531166613101959, + "loss_ce": 0.0030944468453526497, + "loss_iou": 0.37109375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 353285788, + "step": 3643 + }, + { + "epoch": 0.3562768869769261, + "grad_norm": 11.02734927303061, + "learning_rate": 5e-05, + "loss": 0.1269, + "num_input_tokens_seen": 353382056, + "step": 3644 + }, + { + "epoch": 0.3562768869769261, + "loss": 0.12912146747112274, + "loss_ce": 0.006318738684058189, + "loss_iou": 0.296875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 353382056, + "step": 3644 + }, + { + "epoch": 0.35637465780211186, + "grad_norm": 17.336608940143496, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 353479124, + "step": 3645 + }, + { + "epoch": 0.35637465780211186, + "loss": 0.12972089648246765, + "loss_ce": 0.0050756679847836494, + "loss_iou": 0.248046875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 353479124, + "step": 3645 + }, + { + "epoch": 0.3564724286272976, + "grad_norm": 4.0800089382534495, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 353575984, + "step": 3646 + }, + { + "epoch": 0.3564724286272976, + "loss": 0.060149915516376495, + "loss_ce": 0.003593211993575096, + "loss_iou": 0.259765625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 353575984, + "step": 3646 + }, + { + "epoch": 0.35657019945248336, + "grad_norm": 10.594672308572282, + "learning_rate": 5e-05, + "loss": 0.1387, + "num_input_tokens_seen": 353672676, + "step": 3647 + }, + { + "epoch": 0.35657019945248336, + "loss": 0.12772220373153687, + "loss_ce": 0.005262783262878656, + "loss_iou": 0.263671875, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 353672676, + "step": 3647 + }, + { + "epoch": 0.35666797027766917, + "grad_norm": 16.27631254244885, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 353769140, + "step": 3648 + }, + { + "epoch": 0.35666797027766917, + "loss": 0.07216576486825943, + "loss_ce": 0.00685433205217123, + "loss_iou": 0.166015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 353769140, + "step": 3648 + }, + { + "epoch": 0.3567657411028549, + "grad_norm": 12.746887072627676, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 353866284, + "step": 3649 + }, + { + "epoch": 0.3567657411028549, + "loss": 0.055837564170360565, + "loss_ce": 0.004156045615673065, + "loss_iou": 0.20703125, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 353866284, + "step": 3649 + }, + { + "epoch": 0.35686351192804067, + "grad_norm": 6.149233191242357, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 353962684, + "step": 3650 + }, + { + "epoch": 0.35686351192804067, + "loss": 0.11332883685827255, + "loss_ce": 0.006090068724006414, + "loss_iou": 0.2236328125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 353962684, + "step": 3650 + }, + { + "epoch": 0.3569612827532264, + "grad_norm": 16.077185000644985, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 354059848, + "step": 3651 + }, + { + "epoch": 0.3569612827532264, + "loss": 0.06081395223736763, + "loss_ce": 0.005577134899795055, + "loss_iou": 0.23828125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 354059848, + "step": 3651 + }, + { + "epoch": 0.3570590535784122, + "grad_norm": 22.574808076425285, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 354157324, + "step": 3652 + }, + { + "epoch": 0.3570590535784122, + "loss": 0.08616114407777786, + "loss_ce": 0.005548960529267788, + "loss_iou": 0.3671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 354157324, + "step": 3652 + }, + { + "epoch": 0.35715682440359797, + "grad_norm": 16.992835714400787, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 354254000, + "step": 3653 + }, + { + "epoch": 0.35715682440359797, + "loss": 0.1151183545589447, + "loss_ce": 0.003988596610724926, + "loss_iou": 0.341796875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 354254000, + "step": 3653 + }, + { + "epoch": 0.3572545952287837, + "grad_norm": 3.4673087937684612, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 354351280, + "step": 3654 + }, + { + "epoch": 0.3572545952287837, + "loss": 0.0762501209974289, + "loss_ce": 0.006654776632785797, + "loss_iou": 0.275390625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 354351280, + "step": 3654 + }, + { + "epoch": 0.3573523660539695, + "grad_norm": 4.281927967903766, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 354448232, + "step": 3655 + }, + { + "epoch": 0.3573523660539695, + "loss": 0.14252518117427826, + "loss_ce": 0.00849197618663311, + "loss_iou": 0.326171875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 354448232, + "step": 3655 + }, + { + "epoch": 0.3574501368791553, + "grad_norm": 7.267791746253437, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 354545140, + "step": 3656 + }, + { + "epoch": 0.3574501368791553, + "loss": 0.05069546401500702, + "loss_ce": 0.0024242864456027746, + "loss_iou": 0.244140625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 354545140, + "step": 3656 + }, + { + "epoch": 0.35754790770434103, + "grad_norm": 3.04177305157111, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 354643264, + "step": 3657 + }, + { + "epoch": 0.35754790770434103, + "loss": 0.08488242328166962, + "loss_ce": 0.006147066131234169, + "loss_iou": 0.4140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 354643264, + "step": 3657 + }, + { + "epoch": 0.3576456785295268, + "grad_norm": 3.3099637342044654, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 354739880, + "step": 3658 + }, + { + "epoch": 0.3576456785295268, + "loss": 0.061029285192489624, + "loss_ce": 0.0072725736536085606, + "loss_iou": 0.265625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 354739880, + "step": 3658 + }, + { + "epoch": 0.35774344935471253, + "grad_norm": 2.5108277372535595, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 354837028, + "step": 3659 + }, + { + "epoch": 0.35774344935471253, + "loss": 0.07712775468826294, + "loss_ce": 0.004755318630486727, + "loss_iou": 0.39453125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 354837028, + "step": 3659 + }, + { + "epoch": 0.35784122017989833, + "grad_norm": 2.6251516821969325, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 354934008, + "step": 3660 + }, + { + "epoch": 0.35784122017989833, + "loss": 0.07497313618659973, + "loss_ce": 0.003829028457403183, + "loss_iou": 0.28515625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 354934008, + "step": 3660 + }, + { + "epoch": 0.3579389910050841, + "grad_norm": 4.648530911763221, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 355031608, + "step": 3661 + }, + { + "epoch": 0.3579389910050841, + "loss": 0.08563636988401413, + "loss_ce": 0.0026285541243851185, + "loss_iou": 0.34375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 355031608, + "step": 3661 + }, + { + "epoch": 0.35803676183026983, + "grad_norm": 2.604706482142884, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 355127264, + "step": 3662 + }, + { + "epoch": 0.35803676183026983, + "loss": 0.06590857356786728, + "loss_ce": 0.004186774604022503, + "loss_iou": 0.171875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 355127264, + "step": 3662 + }, + { + "epoch": 0.35813453265545564, + "grad_norm": 10.953296195954875, + "learning_rate": 5e-05, + "loss": 0.1119, + "num_input_tokens_seen": 355224412, + "step": 3663 + }, + { + "epoch": 0.35813453265545564, + "loss": 0.11004984378814697, + "loss_ce": 0.009051920846104622, + "loss_iou": 0.287109375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 355224412, + "step": 3663 + }, + { + "epoch": 0.3582323034806414, + "grad_norm": 2.9653694864413285, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 355320620, + "step": 3664 + }, + { + "epoch": 0.3582323034806414, + "loss": 0.07002105563879013, + "loss_ce": 0.006239321082830429, + "loss_iou": 0.265625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 355320620, + "step": 3664 + }, + { + "epoch": 0.35833007430582714, + "grad_norm": 10.269264971310175, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 355417452, + "step": 3665 + }, + { + "epoch": 0.35833007430582714, + "loss": 0.06426654011011124, + "loss_ce": 0.004818294662982225, + "loss_iou": 0.267578125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 355417452, + "step": 3665 + }, + { + "epoch": 0.3584278451310129, + "grad_norm": 38.13148711024423, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 355514232, + "step": 3666 + }, + { + "epoch": 0.3584278451310129, + "loss": 0.11068951338529587, + "loss_ce": 0.003633845830336213, + "loss_iou": 0.337890625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 355514232, + "step": 3666 + }, + { + "epoch": 0.3585256159561987, + "grad_norm": 2.892476478534816, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 355611512, + "step": 3667 + }, + { + "epoch": 0.3585256159561987, + "loss": 0.11602345108985901, + "loss_ce": 0.008578691631555557, + "loss_iou": 0.28515625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 355611512, + "step": 3667 + }, + { + "epoch": 0.35862338678138445, + "grad_norm": 53.82155818187051, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 355708308, + "step": 3668 + }, + { + "epoch": 0.35862338678138445, + "loss": 0.09227895736694336, + "loss_ce": 0.007279874291270971, + "loss_iou": 0.291015625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 355708308, + "step": 3668 + }, + { + "epoch": 0.3587211576065702, + "grad_norm": 8.98669186603388, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 355805152, + "step": 3669 + }, + { + "epoch": 0.3587211576065702, + "loss": 0.1056772917509079, + "loss_ce": 0.007441206369549036, + "loss_iou": 0.53125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 355805152, + "step": 3669 + }, + { + "epoch": 0.35881892843175595, + "grad_norm": 21.709083378726593, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 355901956, + "step": 3670 + }, + { + "epoch": 0.35881892843175595, + "loss": 0.12671257555484772, + "loss_ce": 0.0032384530641138554, + "loss_iou": 0.373046875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 355901956, + "step": 3670 + }, + { + "epoch": 0.35891669925694175, + "grad_norm": 11.263554427772139, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 355998336, + "step": 3671 + }, + { + "epoch": 0.35891669925694175, + "loss": 0.09158742427825928, + "loss_ce": 0.007706044241786003, + "loss_iou": 0.26171875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 355998336, + "step": 3671 + }, + { + "epoch": 0.3590144700821275, + "grad_norm": 7.28233668846287, + "learning_rate": 5e-05, + "loss": 0.1226, + "num_input_tokens_seen": 356095760, + "step": 3672 + }, + { + "epoch": 0.3590144700821275, + "loss": 0.09928470849990845, + "loss_ce": 0.006724888458848, + "loss_iou": 0.328125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 356095760, + "step": 3672 + }, + { + "epoch": 0.35911224090731325, + "grad_norm": 5.523589463722646, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 356193712, + "step": 3673 + }, + { + "epoch": 0.35911224090731325, + "loss": 0.10933808982372284, + "loss_ce": 0.003854083828628063, + "loss_iou": 0.34765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 356193712, + "step": 3673 + }, + { + "epoch": 0.359210011732499, + "grad_norm": 17.154079136647823, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 356290384, + "step": 3674 + }, + { + "epoch": 0.359210011732499, + "loss": 0.10303233563899994, + "loss_ce": 0.0038502102252095938, + "loss_iou": 0.435546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 356290384, + "step": 3674 + }, + { + "epoch": 0.3593077825576848, + "grad_norm": 17.762999678838877, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 356387660, + "step": 3675 + }, + { + "epoch": 0.3593077825576848, + "loss": 0.06547058373689651, + "loss_ce": 0.003779298160225153, + "loss_iou": 0.236328125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 356387660, + "step": 3675 + }, + { + "epoch": 0.35940555338287056, + "grad_norm": 10.194730001918195, + "learning_rate": 5e-05, + "loss": 0.1304, + "num_input_tokens_seen": 356484332, + "step": 3676 + }, + { + "epoch": 0.35940555338287056, + "loss": 0.12350475788116455, + "loss_ce": 0.005752690136432648, + "loss_iou": 0.3359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 356484332, + "step": 3676 + }, + { + "epoch": 0.3595033242080563, + "grad_norm": 4.949384724905499, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 356581336, + "step": 3677 + }, + { + "epoch": 0.3595033242080563, + "loss": 0.06957665830850601, + "loss_ce": 0.005306636914610863, + "loss_iou": 0.333984375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 356581336, + "step": 3677 + }, + { + "epoch": 0.35960109503324206, + "grad_norm": 8.921078010148005, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 356677904, + "step": 3678 + }, + { + "epoch": 0.35960109503324206, + "loss": 0.16222816705703735, + "loss_ce": 0.003750389441847801, + "loss_iou": 0.3125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 356677904, + "step": 3678 + }, + { + "epoch": 0.35969886585842786, + "grad_norm": 4.422595864942582, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 356774540, + "step": 3679 + }, + { + "epoch": 0.35969886585842786, + "loss": 0.05436258018016815, + "loss_ce": 0.004939365200698376, + "loss_iou": 0.322265625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 356774540, + "step": 3679 + }, + { + "epoch": 0.3597966366836136, + "grad_norm": 3.64158537924054, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 356872008, + "step": 3680 + }, + { + "epoch": 0.3597966366836136, + "loss": 0.08294984698295593, + "loss_ce": 0.005732748657464981, + "loss_iou": 0.271484375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 356872008, + "step": 3680 + }, + { + "epoch": 0.35989440750879936, + "grad_norm": 10.06636926396364, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 356969424, + "step": 3681 + }, + { + "epoch": 0.35989440750879936, + "loss": 0.059602294117212296, + "loss_ce": 0.0017409626161679626, + "loss_iou": 0.328125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 356969424, + "step": 3681 + }, + { + "epoch": 0.3599921783339851, + "grad_norm": 18.254544611030656, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 357065544, + "step": 3682 + }, + { + "epoch": 0.3599921783339851, + "loss": 0.09719526767730713, + "loss_ce": 0.00793898943811655, + "loss_iou": 0.287109375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 357065544, + "step": 3682 + }, + { + "epoch": 0.3600899491591709, + "grad_norm": 3.5079700658730606, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 357162112, + "step": 3683 + }, + { + "epoch": 0.3600899491591709, + "loss": 0.0931965559720993, + "loss_ce": 0.006473234388977289, + "loss_iou": 0.2119140625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 357162112, + "step": 3683 + }, + { + "epoch": 0.36018771998435667, + "grad_norm": 3.3922480424869503, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 357259320, + "step": 3684 + }, + { + "epoch": 0.36018771998435667, + "loss": 0.07126418501138687, + "loss_ce": 0.002797998720780015, + "loss_iou": 0.349609375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 357259320, + "step": 3684 + }, + { + "epoch": 0.3602854908095424, + "grad_norm": 7.5478658448246705, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 357355856, + "step": 3685 + }, + { + "epoch": 0.3602854908095424, + "loss": 0.1072937548160553, + "loss_ce": 0.004632624797523022, + "loss_iou": 0.2431640625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 357355856, + "step": 3685 + }, + { + "epoch": 0.3603832616347282, + "grad_norm": 4.919233145966647, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 357451892, + "step": 3686 + }, + { + "epoch": 0.3603832616347282, + "loss": 0.1135704293847084, + "loss_ce": 0.006697873119264841, + "loss_iou": 0.2890625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 357451892, + "step": 3686 + }, + { + "epoch": 0.360481032459914, + "grad_norm": 5.681518175255505, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 357548840, + "step": 3687 + }, + { + "epoch": 0.360481032459914, + "loss": 0.09694263339042664, + "loss_ce": 0.002765388460829854, + "loss_iou": 0.2578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 357548840, + "step": 3687 + }, + { + "epoch": 0.3605788032850997, + "grad_norm": 7.319873035927219, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 357645712, + "step": 3688 + }, + { + "epoch": 0.3605788032850997, + "loss": 0.07660052180290222, + "loss_ce": 0.005723441019654274, + "loss_iou": 0.25390625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 357645712, + "step": 3688 + }, + { + "epoch": 0.3606765741102855, + "grad_norm": 10.552596060038768, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 357742000, + "step": 3689 + }, + { + "epoch": 0.3606765741102855, + "loss": 0.09294263273477554, + "loss_ce": 0.002015509642660618, + "loss_iou": 0.328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 357742000, + "step": 3689 + }, + { + "epoch": 0.3607743449354713, + "grad_norm": 18.717831147966304, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 357837888, + "step": 3690 + }, + { + "epoch": 0.3607743449354713, + "loss": 0.08511850982904434, + "loss_ce": 0.007390236482024193, + "loss_iou": 0.265625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 357837888, + "step": 3690 + }, + { + "epoch": 0.36087211576065703, + "grad_norm": 10.85969055077526, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 357934468, + "step": 3691 + }, + { + "epoch": 0.36087211576065703, + "loss": 0.092132568359375, + "loss_ce": 0.004121785052120686, + "loss_iou": 0.220703125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 357934468, + "step": 3691 + }, + { + "epoch": 0.3609698865858428, + "grad_norm": 6.4769880873285, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 358031884, + "step": 3692 + }, + { + "epoch": 0.3609698865858428, + "loss": 0.07540841400623322, + "loss_ce": 0.0034327050670981407, + "loss_iou": 0.357421875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 358031884, + "step": 3692 + }, + { + "epoch": 0.36106765741102853, + "grad_norm": 5.871117545028331, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 358128812, + "step": 3693 + }, + { + "epoch": 0.36106765741102853, + "loss": 0.14415580034255981, + "loss_ce": 0.009817427024245262, + "loss_iou": 0.353515625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 358128812, + "step": 3693 + }, + { + "epoch": 0.36116542823621434, + "grad_norm": 3.1919653472253846, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 358226264, + "step": 3694 + }, + { + "epoch": 0.36116542823621434, + "loss": 0.04786166548728943, + "loss_ce": 0.0030313441529870033, + "loss_iou": 0.345703125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 358226264, + "step": 3694 + }, + { + "epoch": 0.3612631990614001, + "grad_norm": 1.9255647662676483, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 358322968, + "step": 3695 + }, + { + "epoch": 0.3612631990614001, + "loss": 0.06507913768291473, + "loss_ce": 0.0028232773765921593, + "loss_iou": 0.318359375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 358322968, + "step": 3695 + }, + { + "epoch": 0.36136096988658584, + "grad_norm": 3.235647908169569, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 358419860, + "step": 3696 + }, + { + "epoch": 0.36136096988658584, + "loss": 0.06980133056640625, + "loss_ce": 0.0019912775605916977, + "loss_iou": 0.33203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 358419860, + "step": 3696 + }, + { + "epoch": 0.3614587407117716, + "grad_norm": 20.723467362276953, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 358516812, + "step": 3697 + }, + { + "epoch": 0.3614587407117716, + "loss": 0.09161442518234253, + "loss_ce": 0.0034186216071248055, + "loss_iou": 0.359375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 358516812, + "step": 3697 + }, + { + "epoch": 0.3615565115369574, + "grad_norm": 31.49328076993784, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 358613392, + "step": 3698 + }, + { + "epoch": 0.3615565115369574, + "loss": 0.06993323564529419, + "loss_ce": 0.006746589206159115, + "loss_iou": 0.3515625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 358613392, + "step": 3698 + }, + { + "epoch": 0.36165428236214314, + "grad_norm": 20.069922818415186, + "learning_rate": 5e-05, + "loss": 0.1619, + "num_input_tokens_seen": 358710224, + "step": 3699 + }, + { + "epoch": 0.36165428236214314, + "loss": 0.16499879956245422, + "loss_ce": 0.007039825432002544, + "loss_iou": 0.482421875, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 358710224, + "step": 3699 + }, + { + "epoch": 0.3617520531873289, + "grad_norm": 23.20482864266082, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 358806928, + "step": 3700 + }, + { + "epoch": 0.3617520531873289, + "loss": 0.10555111616849899, + "loss_ce": 0.011221282184123993, + "loss_iou": 0.2578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 358806928, + "step": 3700 + }, + { + "epoch": 0.36184982401251464, + "grad_norm": 4.92690498113628, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 358904308, + "step": 3701 + }, + { + "epoch": 0.36184982401251464, + "loss": 0.06824538856744766, + "loss_ce": 0.006668546237051487, + "loss_iou": 0.33984375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 358904308, + "step": 3701 + }, + { + "epoch": 0.36194759483770045, + "grad_norm": 3.8055643564499317, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 359000776, + "step": 3702 + }, + { + "epoch": 0.36194759483770045, + "loss": 0.10673561692237854, + "loss_ce": 0.00576820457354188, + "loss_iou": 0.2890625, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 359000776, + "step": 3702 + }, + { + "epoch": 0.3620453656628862, + "grad_norm": 4.7831420432954666, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 359097452, + "step": 3703 + }, + { + "epoch": 0.3620453656628862, + "loss": 0.07310818135738373, + "loss_ce": 0.0046038441359996796, + "loss_iou": 0.22265625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 359097452, + "step": 3703 + }, + { + "epoch": 0.36214313648807195, + "grad_norm": 12.935430952046962, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 359194464, + "step": 3704 + }, + { + "epoch": 0.36214313648807195, + "loss": 0.11624442040920258, + "loss_ce": 0.005068879574537277, + "loss_iou": 0.248046875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 359194464, + "step": 3704 + }, + { + "epoch": 0.3622409073132577, + "grad_norm": 7.041611779354631, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 359290692, + "step": 3705 + }, + { + "epoch": 0.3622409073132577, + "loss": 0.07222464680671692, + "loss_ce": 0.004242929629981518, + "loss_iou": 0.33984375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 359290692, + "step": 3705 + }, + { + "epoch": 0.3623386781384435, + "grad_norm": 4.414924008739671, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 359389016, + "step": 3706 + }, + { + "epoch": 0.3623386781384435, + "loss": 0.0754464864730835, + "loss_ce": 0.005683306138962507, + "loss_iou": 0.396484375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 359389016, + "step": 3706 + }, + { + "epoch": 0.36243644896362925, + "grad_norm": 10.920201528597495, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 359486236, + "step": 3707 + }, + { + "epoch": 0.36243644896362925, + "loss": 0.12170445919036865, + "loss_ce": 0.007507690228521824, + "loss_iou": 0.390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 359486236, + "step": 3707 + }, + { + "epoch": 0.362534219788815, + "grad_norm": 9.00485774553294, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 359582780, + "step": 3708 + }, + { + "epoch": 0.362534219788815, + "loss": 0.062312960624694824, + "loss_ce": 0.004863621201366186, + "loss_iou": 0.294921875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 359582780, + "step": 3708 + }, + { + "epoch": 0.3626319906140008, + "grad_norm": 9.207396668177934, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 359680228, + "step": 3709 + }, + { + "epoch": 0.3626319906140008, + "loss": 0.1168375164270401, + "loss_ce": 0.0066690593957901, + "loss_iou": 0.35546875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 359680228, + "step": 3709 + }, + { + "epoch": 0.36272976143918656, + "grad_norm": 6.287692479417052, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 359775604, + "step": 3710 + }, + { + "epoch": 0.36272976143918656, + "loss": 0.09295249730348587, + "loss_ce": 0.006252055987715721, + "loss_iou": 0.166015625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 359775604, + "step": 3710 + }, + { + "epoch": 0.3628275322643723, + "grad_norm": 3.454919590378976, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 359873132, + "step": 3711 + }, + { + "epoch": 0.3628275322643723, + "loss": 0.07863177359104156, + "loss_ce": 0.004290949087589979, + "loss_iou": 0.310546875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 359873132, + "step": 3711 + }, + { + "epoch": 0.36292530308955806, + "grad_norm": 4.66051330770601, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 359969676, + "step": 3712 + }, + { + "epoch": 0.36292530308955806, + "loss": 0.12968772649765015, + "loss_ce": 0.006244135554879904, + "loss_iou": 0.158203125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 359969676, + "step": 3712 + }, + { + "epoch": 0.36302307391474387, + "grad_norm": 7.860878275449532, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 360067556, + "step": 3713 + }, + { + "epoch": 0.36302307391474387, + "loss": 0.12185822427272797, + "loss_ce": 0.007173157297074795, + "loss_iou": 0.3671875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 360067556, + "step": 3713 + }, + { + "epoch": 0.3631208447399296, + "grad_norm": 12.149855807310184, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 360163648, + "step": 3714 + }, + { + "epoch": 0.3631208447399296, + "loss": 0.09531697630882263, + "loss_ce": 0.0051070088520646095, + "loss_iou": 0.326171875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 360163648, + "step": 3714 + }, + { + "epoch": 0.36321861556511537, + "grad_norm": 17.257371076649935, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 360260636, + "step": 3715 + }, + { + "epoch": 0.36321861556511537, + "loss": 0.11020901799201965, + "loss_ce": 0.007410557009279728, + "loss_iou": 0.296875, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 360260636, + "step": 3715 + }, + { + "epoch": 0.3633163863903011, + "grad_norm": 12.447126705731602, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 360357664, + "step": 3716 + }, + { + "epoch": 0.3633163863903011, + "loss": 0.10113222897052765, + "loss_ce": 0.005425294861197472, + "loss_iou": 0.337890625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 360357664, + "step": 3716 + }, + { + "epoch": 0.3634141572154869, + "grad_norm": 3.3986115638784704, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 360454324, + "step": 3717 + }, + { + "epoch": 0.3634141572154869, + "loss": 0.06816376745700836, + "loss_ce": 0.005297561176121235, + "loss_iou": 0.326171875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 360454324, + "step": 3717 + }, + { + "epoch": 0.36351192804067267, + "grad_norm": 4.1465377671980646, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 360551612, + "step": 3718 + }, + { + "epoch": 0.36351192804067267, + "loss": 0.11538651585578918, + "loss_ce": 0.006499798968434334, + "loss_iou": 0.353515625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 360551612, + "step": 3718 + }, + { + "epoch": 0.3636096988658584, + "grad_norm": 14.028277493255759, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 360648436, + "step": 3719 + }, + { + "epoch": 0.3636096988658584, + "loss": 0.077998585999012, + "loss_ce": 0.0038408711552619934, + "loss_iou": 0.306640625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 360648436, + "step": 3719 + }, + { + "epoch": 0.36370746969104417, + "grad_norm": 16.481744164362727, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 360745732, + "step": 3720 + }, + { + "epoch": 0.36370746969104417, + "loss": 0.11939850449562073, + "loss_ce": 0.003523254068568349, + "loss_iou": 0.455078125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 360745732, + "step": 3720 + }, + { + "epoch": 0.36380524051623, + "grad_norm": 10.86240808804502, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 360843184, + "step": 3721 + }, + { + "epoch": 0.36380524051623, + "loss": 0.07287681847810745, + "loss_ce": 0.005799181759357452, + "loss_iou": 0.419921875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 360843184, + "step": 3721 + }, + { + "epoch": 0.3639030113414157, + "grad_norm": 3.551239200301727, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 360940060, + "step": 3722 + }, + { + "epoch": 0.3639030113414157, + "loss": 0.07607821375131607, + "loss_ce": 0.004422939848154783, + "loss_iou": 0.390625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 360940060, + "step": 3722 + }, + { + "epoch": 0.3640007821666015, + "grad_norm": 3.434237305621263, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 361036552, + "step": 3723 + }, + { + "epoch": 0.3640007821666015, + "loss": 0.07722422480583191, + "loss_ce": 0.00741526810452342, + "loss_iou": 0.255859375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 361036552, + "step": 3723 + }, + { + "epoch": 0.3640985529917872, + "grad_norm": 6.436845314406339, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 361133792, + "step": 3724 + }, + { + "epoch": 0.3640985529917872, + "loss": 0.05811290815472603, + "loss_ce": 0.005592154338955879, + "loss_iou": 0.291015625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 361133792, + "step": 3724 + }, + { + "epoch": 0.36419632381697303, + "grad_norm": 9.316481835485389, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 361230208, + "step": 3725 + }, + { + "epoch": 0.36419632381697303, + "loss": 0.12236005067825317, + "loss_ce": 0.00965863186866045, + "loss_iou": 0.236328125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 361230208, + "step": 3725 + }, + { + "epoch": 0.3642940946421588, + "grad_norm": 5.525345760290095, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 361326776, + "step": 3726 + }, + { + "epoch": 0.3642940946421588, + "loss": 0.04984424263238907, + "loss_ce": 0.002969244960695505, + "loss_iou": 0.337890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 361326776, + "step": 3726 + }, + { + "epoch": 0.36439186546734453, + "grad_norm": 3.3910152705287775, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 361422668, + "step": 3727 + }, + { + "epoch": 0.36439186546734453, + "loss": 0.03926313668489456, + "loss_ce": 0.0027030748315155506, + "loss_iou": 0.3046875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 361422668, + "step": 3727 + }, + { + "epoch": 0.3644896362925303, + "grad_norm": 16.671286473183336, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 361519260, + "step": 3728 + }, + { + "epoch": 0.3644896362925303, + "loss": 0.0486658550798893, + "loss_ce": 0.003879403229802847, + "loss_iou": 0.294921875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 361519260, + "step": 3728 + }, + { + "epoch": 0.3645874071177161, + "grad_norm": 36.16745996944589, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 361616304, + "step": 3729 + }, + { + "epoch": 0.3645874071177161, + "loss": 0.08341249078512192, + "loss_ce": 0.010719622485339642, + "loss_iou": 0.302734375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 361616304, + "step": 3729 + }, + { + "epoch": 0.36468517794290184, + "grad_norm": 10.031948674703534, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 361713952, + "step": 3730 + }, + { + "epoch": 0.36468517794290184, + "loss": 0.10769357532262802, + "loss_ce": 0.008633514866232872, + "loss_iou": 0.40234375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 361713952, + "step": 3730 + }, + { + "epoch": 0.3647829487680876, + "grad_norm": 4.528489325965079, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 361810628, + "step": 3731 + }, + { + "epoch": 0.3647829487680876, + "loss": 0.06261274218559265, + "loss_ce": 0.0071851941756904125, + "loss_iou": 0.34765625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 361810628, + "step": 3731 + }, + { + "epoch": 0.3648807195932734, + "grad_norm": 11.959915099963204, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 361908052, + "step": 3732 + }, + { + "epoch": 0.3648807195932734, + "loss": 0.07050833106040955, + "loss_ce": 0.0037663904950022697, + "loss_iou": 0.376953125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 361908052, + "step": 3732 + }, + { + "epoch": 0.36497849041845914, + "grad_norm": 11.661858519206598, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 362005500, + "step": 3733 + }, + { + "epoch": 0.36497849041845914, + "loss": 0.07233504951000214, + "loss_ce": 0.008187097497284412, + "loss_iou": 0.287109375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 362005500, + "step": 3733 + }, + { + "epoch": 0.3650762612436449, + "grad_norm": 10.497678490986708, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 362101696, + "step": 3734 + }, + { + "epoch": 0.3650762612436449, + "loss": 0.08777423202991486, + "loss_ce": 0.006963680498301983, + "loss_iou": 0.1728515625, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 362101696, + "step": 3734 + }, + { + "epoch": 0.36517403206883065, + "grad_norm": 11.51887928646746, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 362198656, + "step": 3735 + }, + { + "epoch": 0.36517403206883065, + "loss": 0.11322231590747833, + "loss_ce": 0.005006984807550907, + "loss_iou": 0.298828125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 362198656, + "step": 3735 + }, + { + "epoch": 0.36527180289401645, + "grad_norm": 5.2222443677878205, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 362295780, + "step": 3736 + }, + { + "epoch": 0.36527180289401645, + "loss": 0.12010755389928818, + "loss_ce": 0.00816906988620758, + "loss_iou": 0.2197265625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 362295780, + "step": 3736 + }, + { + "epoch": 0.3653695737192022, + "grad_norm": 6.952215854449989, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 362393032, + "step": 3737 + }, + { + "epoch": 0.3653695737192022, + "loss": 0.12462945282459259, + "loss_ce": 0.008204905316233635, + "loss_iou": 0.3359375, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 362393032, + "step": 3737 + }, + { + "epoch": 0.36546734454438795, + "grad_norm": 4.298479239477643, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 362489836, + "step": 3738 + }, + { + "epoch": 0.36546734454438795, + "loss": 0.043332476168870926, + "loss_ce": 0.004809755831956863, + "loss_iou": 0.302734375, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 362489836, + "step": 3738 + }, + { + "epoch": 0.3655651153695737, + "grad_norm": 3.378959142271693, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 362587344, + "step": 3739 + }, + { + "epoch": 0.3655651153695737, + "loss": 0.05029475688934326, + "loss_ce": 0.005678053945302963, + "loss_iou": 0.2314453125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 362587344, + "step": 3739 + }, + { + "epoch": 0.3656628861947595, + "grad_norm": 2.6356527548252293, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 362684304, + "step": 3740 + }, + { + "epoch": 0.3656628861947595, + "loss": 0.06829662621021271, + "loss_ce": 0.006635865196585655, + "loss_iou": 0.275390625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 362684304, + "step": 3740 + }, + { + "epoch": 0.36576065701994526, + "grad_norm": 9.754103246785396, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 362781592, + "step": 3741 + }, + { + "epoch": 0.36576065701994526, + "loss": 0.08942298591136932, + "loss_ce": 0.0068119000643491745, + "loss_iou": 0.4609375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 362781592, + "step": 3741 + }, + { + "epoch": 0.365858427845131, + "grad_norm": 19.319768193672772, + "learning_rate": 5e-05, + "loss": 0.1217, + "num_input_tokens_seen": 362877944, + "step": 3742 + }, + { + "epoch": 0.365858427845131, + "loss": 0.13990803062915802, + "loss_ce": 0.008682447485625744, + "loss_iou": 0.337890625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 362877944, + "step": 3742 + }, + { + "epoch": 0.36595619867031676, + "grad_norm": 3.4823112383159893, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 362974468, + "step": 3743 + }, + { + "epoch": 0.36595619867031676, + "loss": 0.08444628119468689, + "loss_ce": 0.007479043677449226, + "loss_iou": 0.3125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 362974468, + "step": 3743 + }, + { + "epoch": 0.36605396949550256, + "grad_norm": 15.299023757289628, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 363072368, + "step": 3744 + }, + { + "epoch": 0.36605396949550256, + "loss": 0.07173138111829758, + "loss_ce": 0.005996519699692726, + "loss_iou": 0.3984375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 363072368, + "step": 3744 + }, + { + "epoch": 0.3661517403206883, + "grad_norm": 5.572030791392617, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 363170616, + "step": 3745 + }, + { + "epoch": 0.3661517403206883, + "loss": 0.0483425036072731, + "loss_ce": 0.004000463522970676, + "loss_iou": 0.408203125, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 363170616, + "step": 3745 + }, + { + "epoch": 0.36624951114587406, + "grad_norm": 26.03545440789826, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 363268552, + "step": 3746 + }, + { + "epoch": 0.36624951114587406, + "loss": 0.10090132057666779, + "loss_ce": 0.007868480868637562, + "loss_iou": 0.392578125, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 363268552, + "step": 3746 + }, + { + "epoch": 0.3663472819710598, + "grad_norm": 5.791631876881827, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 363365764, + "step": 3747 + }, + { + "epoch": 0.3663472819710598, + "loss": 0.11691135168075562, + "loss_ce": 0.003393583931028843, + "loss_iou": 0.390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 363365764, + "step": 3747 + }, + { + "epoch": 0.3664450527962456, + "grad_norm": 4.578068524901527, + "learning_rate": 5e-05, + "loss": 0.115, + "num_input_tokens_seen": 363462444, + "step": 3748 + }, + { + "epoch": 0.3664450527962456, + "loss": 0.07698284089565277, + "loss_ce": 0.003679615445435047, + "loss_iou": 0.330078125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 363462444, + "step": 3748 + }, + { + "epoch": 0.36654282362143137, + "grad_norm": 9.528622540723195, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 363559192, + "step": 3749 + }, + { + "epoch": 0.36654282362143137, + "loss": 0.0579766146838665, + "loss_ce": 0.005392921157181263, + "loss_iou": 0.2431640625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 363559192, + "step": 3749 + }, + { + "epoch": 0.3666405944466171, + "grad_norm": 1.6002102528866193, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 363655376, + "step": 3750 + }, + { + "epoch": 0.3666405944466171, + "eval_seeclick_CIoU": 0.49133653938770294, + "eval_seeclick_GIoU": 0.4929518699645996, + "eval_seeclick_IoU": 0.5360150039196014, + "eval_seeclick_MAE_all": 0.08154381439089775, + "eval_seeclick_MAE_h": 0.04476317763328552, + "eval_seeclick_MAE_w": 0.11702768504619598, + "eval_seeclick_MAE_x": 0.1195368655025959, + "eval_seeclick_MAE_y": 0.044847521930933, + "eval_seeclick_NUM_probability": 0.9999814629554749, + "eval_seeclick_inside_bbox": 0.7073863744735718, + "eval_seeclick_loss": 0.2933661639690399, + "eval_seeclick_loss_ce": 0.010489025618880987, + "eval_seeclick_loss_iou": 0.4283447265625, + "eval_seeclick_loss_num": 0.05808258056640625, + "eval_seeclick_loss_xval": 0.2904052734375, + "eval_seeclick_runtime": 72.9173, + "eval_seeclick_samples_per_second": 0.59, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 363655376, + "step": 3750 + }, + { + "epoch": 0.3666405944466171, + "eval_icons_CIoU": 0.6300404369831085, + "eval_icons_GIoU": 0.6175053119659424, + "eval_icons_IoU": 0.666627824306488, + "eval_icons_MAE_all": 0.06547792255878448, + "eval_icons_MAE_h": 0.08619673550128937, + "eval_icons_MAE_w": 0.04484017193317413, + "eval_icons_MAE_x": 0.04485031217336655, + "eval_icons_MAE_y": 0.08602447807788849, + "eval_icons_NUM_probability": 0.9999747276306152, + "eval_icons_inside_bbox": 0.7795138955116272, + "eval_icons_loss": 0.18838661909103394, + "eval_icons_loss_ce": 7.90132821748557e-06, + "eval_icons_loss_iou": 0.3662109375, + "eval_icons_loss_num": 0.041461944580078125, + "eval_icons_loss_xval": 0.2073974609375, + "eval_icons_runtime": 86.6268, + "eval_icons_samples_per_second": 0.577, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 363655376, + "step": 3750 + }, + { + "epoch": 0.3666405944466171, + "eval_screenspot_CIoU": 0.304678996404012, + "eval_screenspot_GIoU": 0.2915564278761546, + "eval_screenspot_IoU": 0.3920852839946747, + "eval_screenspot_MAE_all": 0.16089056432247162, + "eval_screenspot_MAE_h": 0.12372669577598572, + "eval_screenspot_MAE_w": 0.2146764099597931, + "eval_screenspot_MAE_x": 0.1759794702132543, + "eval_screenspot_MAE_y": 0.12917965898911157, + "eval_screenspot_NUM_probability": 0.9998706777890524, + "eval_screenspot_inside_bbox": 0.6537500023841858, + "eval_screenspot_loss": 0.5792322754859924, + "eval_screenspot_loss_ce": 0.02633878029882908, + "eval_screenspot_loss_iou": 0.3446858723958333, + "eval_screenspot_loss_num": 0.11219278971354167, + "eval_screenspot_loss_xval": 0.5608927408854166, + "eval_screenspot_runtime": 157.5887, + "eval_screenspot_samples_per_second": 0.565, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 363655376, + "step": 3750 + }, + { + "epoch": 0.3666405944466171, + "eval_compot_CIoU": 0.45631466805934906, + "eval_compot_GIoU": 0.43800507485866547, + "eval_compot_IoU": 0.5261825621128082, + "eval_compot_MAE_all": 0.0957164354622364, + "eval_compot_MAE_h": 0.06870023906230927, + "eval_compot_MAE_w": 0.12157975137233734, + "eval_compot_MAE_x": 0.12188627198338509, + "eval_compot_MAE_y": 0.07069947570562363, + "eval_compot_NUM_probability": 0.9999642372131348, + "eval_compot_inside_bbox": 0.6493055522441864, + "eval_compot_loss": 0.2962746322154999, + "eval_compot_loss_ce": 0.018882127478718758, + "eval_compot_loss_iou": 0.462646484375, + "eval_compot_loss_num": 0.0498199462890625, + "eval_compot_loss_xval": 0.24896240234375, + "eval_compot_runtime": 89.8033, + "eval_compot_samples_per_second": 0.557, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 363655376, + "step": 3750 + }, + { + "epoch": 0.3666405944466171, + "loss": 0.25848838686943054, + "loss_ce": 0.01758262701332569, + "loss_iou": 0.4609375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 363655376, + "step": 3750 + }, + { + "epoch": 0.36673836527180287, + "grad_norm": 5.931747539788863, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 363751912, + "step": 3751 + }, + { + "epoch": 0.36673836527180287, + "loss": 0.12459263205528259, + "loss_ce": 0.006626930087804794, + "loss_iou": 0.28125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 363751912, + "step": 3751 + }, + { + "epoch": 0.3668361360969887, + "grad_norm": 2.582634386995352, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 363848924, + "step": 3752 + }, + { + "epoch": 0.3668361360969887, + "loss": 0.037460483610630035, + "loss_ce": 0.0029908795841038227, + "loss_iou": 0.279296875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 363848924, + "step": 3752 + }, + { + "epoch": 0.3669339069221744, + "grad_norm": 7.440697725401821, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 363945180, + "step": 3753 + }, + { + "epoch": 0.3669339069221744, + "loss": 0.08021224290132523, + "loss_ce": 0.008236536756157875, + "loss_iou": 0.251953125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 363945180, + "step": 3753 + }, + { + "epoch": 0.3670316777473602, + "grad_norm": 7.901321152762984, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 364042268, + "step": 3754 + }, + { + "epoch": 0.3670316777473602, + "loss": 0.061611440032720566, + "loss_ce": 0.004223134368658066, + "loss_iou": 0.33984375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 364042268, + "step": 3754 + }, + { + "epoch": 0.367129448572546, + "grad_norm": 30.756803061860083, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 364139628, + "step": 3755 + }, + { + "epoch": 0.367129448572546, + "loss": 0.08046334236860275, + "loss_ce": 0.005023889243602753, + "loss_iou": 0.4296875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 364139628, + "step": 3755 + }, + { + "epoch": 0.36722721939773173, + "grad_norm": 37.65616848308062, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 364236100, + "step": 3756 + }, + { + "epoch": 0.36722721939773173, + "loss": 0.11857988685369492, + "loss_ce": 0.006305708549916744, + "loss_iou": 0.337890625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 364236100, + "step": 3756 + }, + { + "epoch": 0.3673249902229175, + "grad_norm": 10.111800865042978, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 364333636, + "step": 3757 + }, + { + "epoch": 0.3673249902229175, + "loss": 0.07712939381599426, + "loss_ce": 0.0036430652253329754, + "loss_iou": 0.3828125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 364333636, + "step": 3757 + }, + { + "epoch": 0.36742276104810323, + "grad_norm": 4.935413275835035, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 364430476, + "step": 3758 + }, + { + "epoch": 0.36742276104810323, + "loss": 0.10031279176473618, + "loss_ce": 0.006623832043260336, + "loss_iou": 0.28515625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 364430476, + "step": 3758 + }, + { + "epoch": 0.36752053187328904, + "grad_norm": 12.324201860716737, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 364527672, + "step": 3759 + }, + { + "epoch": 0.36752053187328904, + "loss": 0.08048311620950699, + "loss_ce": 0.005875267554074526, + "loss_iou": 0.232421875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 364527672, + "step": 3759 + }, + { + "epoch": 0.3676183026984748, + "grad_norm": 2.6845648328009606, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 364624836, + "step": 3760 + }, + { + "epoch": 0.3676183026984748, + "loss": 0.046138521283864975, + "loss_ce": 0.0030324412509799004, + "loss_iou": 0.2001953125, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 364624836, + "step": 3760 + }, + { + "epoch": 0.36771607352366054, + "grad_norm": 9.697114732869624, + "learning_rate": 5e-05, + "loss": 0.1335, + "num_input_tokens_seen": 364720936, + "step": 3761 + }, + { + "epoch": 0.36771607352366054, + "loss": 0.14859497547149658, + "loss_ce": 0.004346016328781843, + "loss_iou": 0.283203125, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 364720936, + "step": 3761 + }, + { + "epoch": 0.3678138443488463, + "grad_norm": 3.115279461617078, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 364818408, + "step": 3762 + }, + { + "epoch": 0.3678138443488463, + "loss": 0.09815789759159088, + "loss_ce": 0.006849301978945732, + "loss_iou": 0.40625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 364818408, + "step": 3762 + }, + { + "epoch": 0.3679116151740321, + "grad_norm": 3.330341967893589, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 364915076, + "step": 3763 + }, + { + "epoch": 0.3679116151740321, + "loss": 0.08142250776290894, + "loss_ce": 0.00509804580360651, + "loss_iou": 0.30078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 364915076, + "step": 3763 + }, + { + "epoch": 0.36800938599921784, + "grad_norm": 5.69890768615716, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 365011360, + "step": 3764 + }, + { + "epoch": 0.36800938599921784, + "loss": 0.05814823508262634, + "loss_ce": 0.0032165944576263428, + "loss_iou": 0.28125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 365011360, + "step": 3764 + }, + { + "epoch": 0.3681071568244036, + "grad_norm": 10.203126707839496, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 365107836, + "step": 3765 + }, + { + "epoch": 0.3681071568244036, + "loss": 0.07226558029651642, + "loss_ce": 0.0035857665352523327, + "loss_iou": 0.35546875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 365107836, + "step": 3765 + }, + { + "epoch": 0.36820492764958934, + "grad_norm": 23.834562581169806, + "learning_rate": 5e-05, + "loss": 0.1195, + "num_input_tokens_seen": 365205152, + "step": 3766 + }, + { + "epoch": 0.36820492764958934, + "loss": 0.10661637783050537, + "loss_ce": 0.002795569831505418, + "loss_iou": 0.349609375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 365205152, + "step": 3766 + }, + { + "epoch": 0.36830269847477515, + "grad_norm": 7.299465042629759, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 365301556, + "step": 3767 + }, + { + "epoch": 0.36830269847477515, + "loss": 0.08859182894229889, + "loss_ce": 0.010543117299675941, + "loss_iou": 0.2236328125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 365301556, + "step": 3767 + }, + { + "epoch": 0.3684004692999609, + "grad_norm": 7.058282158547796, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 365398408, + "step": 3768 + }, + { + "epoch": 0.3684004692999609, + "loss": 0.07876697927713394, + "loss_ce": 0.010117687284946442, + "loss_iou": 0.3359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 365398408, + "step": 3768 + }, + { + "epoch": 0.36849824012514665, + "grad_norm": 5.85134528534828, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 365495308, + "step": 3769 + }, + { + "epoch": 0.36849824012514665, + "loss": 0.10372354090213776, + "loss_ce": 0.004587194416671991, + "loss_iou": 0.2734375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 365495308, + "step": 3769 + }, + { + "epoch": 0.3685960109503324, + "grad_norm": 17.265059140690468, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 365591716, + "step": 3770 + }, + { + "epoch": 0.3685960109503324, + "loss": 0.09591029584407806, + "loss_ce": 0.007500871084630489, + "loss_iou": 0.345703125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 365591716, + "step": 3770 + }, + { + "epoch": 0.3686937817755182, + "grad_norm": 18.096077709382094, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 365688272, + "step": 3771 + }, + { + "epoch": 0.3686937817755182, + "loss": 0.06609825789928436, + "loss_ce": 0.0032625708263367414, + "loss_iou": 0.345703125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 365688272, + "step": 3771 + }, + { + "epoch": 0.36879155260070395, + "grad_norm": 4.5107823755532666, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 365785132, + "step": 3772 + }, + { + "epoch": 0.36879155260070395, + "loss": 0.11875664442777634, + "loss_ce": 0.012296071276068687, + "loss_iou": 0.287109375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 365785132, + "step": 3772 + }, + { + "epoch": 0.3688893234258897, + "grad_norm": 5.604981451500941, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 365882724, + "step": 3773 + }, + { + "epoch": 0.3688893234258897, + "loss": 0.0784069150686264, + "loss_ce": 0.011420827358961105, + "loss_iou": 0.34375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 365882724, + "step": 3773 + }, + { + "epoch": 0.36898709425107545, + "grad_norm": 5.437245486398473, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 365978420, + "step": 3774 + }, + { + "epoch": 0.36898709425107545, + "loss": 0.064273402094841, + "loss_ce": 0.0027804826386272907, + "loss_iou": 0.3359375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 365978420, + "step": 3774 + }, + { + "epoch": 0.36908486507626126, + "grad_norm": 4.579719353558241, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 366075168, + "step": 3775 + }, + { + "epoch": 0.36908486507626126, + "loss": 0.06215249001979828, + "loss_ce": 0.002315147314220667, + "loss_iou": 0.33203125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 366075168, + "step": 3775 + }, + { + "epoch": 0.369182635901447, + "grad_norm": 15.991388937721364, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 366171612, + "step": 3776 + }, + { + "epoch": 0.369182635901447, + "loss": 0.06728759407997131, + "loss_ce": 0.005195764359086752, + "loss_iou": 0.359375, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 366171612, + "step": 3776 + }, + { + "epoch": 0.36928040672663276, + "grad_norm": 33.401733640741256, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 366268648, + "step": 3777 + }, + { + "epoch": 0.36928040672663276, + "loss": 0.09895855188369751, + "loss_ce": 0.007146425079554319, + "loss_iou": 0.345703125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 366268648, + "step": 3777 + }, + { + "epoch": 0.36937817755181857, + "grad_norm": 14.13086087876926, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 366365488, + "step": 3778 + }, + { + "epoch": 0.36937817755181857, + "loss": 0.0640222579240799, + "loss_ce": 0.003902625758200884, + "loss_iou": 0.3125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 366365488, + "step": 3778 + }, + { + "epoch": 0.3694759483770043, + "grad_norm": 3.9078347768644943, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 366461364, + "step": 3779 + }, + { + "epoch": 0.3694759483770043, + "loss": 0.08722759038209915, + "loss_ce": 0.010750536806881428, + "loss_iou": 0.240234375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 366461364, + "step": 3779 + }, + { + "epoch": 0.36957371920219007, + "grad_norm": 13.20110135179272, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 366558296, + "step": 3780 + }, + { + "epoch": 0.36957371920219007, + "loss": 0.06739156693220139, + "loss_ce": 0.0016948493430390954, + "loss_iou": 0.34375, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 366558296, + "step": 3780 + }, + { + "epoch": 0.3696714900273758, + "grad_norm": 15.090084805609546, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 366654416, + "step": 3781 + }, + { + "epoch": 0.3696714900273758, + "loss": 0.08331866562366486, + "loss_ce": 0.006017640233039856, + "loss_iou": 0.28515625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 366654416, + "step": 3781 + }, + { + "epoch": 0.3697692608525616, + "grad_norm": 79.9760890065682, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 366751452, + "step": 3782 + }, + { + "epoch": 0.3697692608525616, + "loss": 0.08412602543830872, + "loss_ce": 0.005451706238090992, + "loss_iou": 0.30078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 366751452, + "step": 3782 + }, + { + "epoch": 0.36986703167774737, + "grad_norm": 2.9022460518643514, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 366848224, + "step": 3783 + }, + { + "epoch": 0.36986703167774737, + "loss": 0.04374673217535019, + "loss_ce": 0.0051572564989328384, + "loss_iou": 0.337890625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 366848224, + "step": 3783 + }, + { + "epoch": 0.3699648025029331, + "grad_norm": 3.9939796199304602, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 366945100, + "step": 3784 + }, + { + "epoch": 0.3699648025029331, + "loss": 0.11790002882480621, + "loss_ce": 0.009989875368773937, + "loss_iou": 0.31640625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 366945100, + "step": 3784 + }, + { + "epoch": 0.37006257332811887, + "grad_norm": 10.934138258276509, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 367041460, + "step": 3785 + }, + { + "epoch": 0.37006257332811887, + "loss": 0.07250408828258514, + "loss_ce": 0.005258604418486357, + "loss_iou": 0.21875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 367041460, + "step": 3785 + }, + { + "epoch": 0.3701603441533047, + "grad_norm": 6.476346287812824, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 367137084, + "step": 3786 + }, + { + "epoch": 0.3701603441533047, + "loss": 0.03350585699081421, + "loss_ce": 0.003224790096282959, + "loss_iou": 0.15625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 367137084, + "step": 3786 + }, + { + "epoch": 0.3702581149784904, + "grad_norm": 48.45908950547822, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 367233776, + "step": 3787 + }, + { + "epoch": 0.3702581149784904, + "loss": 0.11180202662944794, + "loss_ce": 0.008088032715022564, + "loss_iou": 0.1962890625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 367233776, + "step": 3787 + }, + { + "epoch": 0.3703558858036762, + "grad_norm": 5.646187858207589, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 367331728, + "step": 3788 + }, + { + "epoch": 0.3703558858036762, + "loss": 0.06564750522375107, + "loss_ce": 0.004978561773896217, + "loss_iou": 0.25390625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 367331728, + "step": 3788 + }, + { + "epoch": 0.3704536566288619, + "grad_norm": 5.211718209659897, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 367430280, + "step": 3789 + }, + { + "epoch": 0.3704536566288619, + "loss": 0.0641762763261795, + "loss_ce": 0.005262091290205717, + "loss_iou": 0.375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 367430280, + "step": 3789 + }, + { + "epoch": 0.37055142745404773, + "grad_norm": 2.852027514415109, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 367527060, + "step": 3790 + }, + { + "epoch": 0.37055142745404773, + "loss": 0.092508964240551, + "loss_ce": 0.004954037722200155, + "loss_iou": 0.30859375, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 367527060, + "step": 3790 + }, + { + "epoch": 0.3706491982792335, + "grad_norm": 10.090975103713959, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 367624456, + "step": 3791 + }, + { + "epoch": 0.3706491982792335, + "loss": 0.0674804225564003, + "loss_ce": 0.005148266442120075, + "loss_iou": 0.390625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 367624456, + "step": 3791 + }, + { + "epoch": 0.37074696910441923, + "grad_norm": 4.020088563079946, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 367721280, + "step": 3792 + }, + { + "epoch": 0.37074696910441923, + "loss": 0.08075754344463348, + "loss_ce": 0.00542490603402257, + "loss_iou": 0.388671875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 367721280, + "step": 3792 + }, + { + "epoch": 0.370844739929605, + "grad_norm": 12.60031029144515, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 367818044, + "step": 3793 + }, + { + "epoch": 0.370844739929605, + "loss": 0.09542054682970047, + "loss_ce": 0.006156632211059332, + "loss_iou": 0.33984375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 367818044, + "step": 3793 + }, + { + "epoch": 0.3709425107547908, + "grad_norm": 6.8807492618451205, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 367914984, + "step": 3794 + }, + { + "epoch": 0.3709425107547908, + "loss": 0.12166531383991241, + "loss_ce": 0.009146995842456818, + "loss_iou": 0.35546875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 367914984, + "step": 3794 + }, + { + "epoch": 0.37104028157997654, + "grad_norm": 10.439559757143236, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 368012024, + "step": 3795 + }, + { + "epoch": 0.37104028157997654, + "loss": 0.11590050160884857, + "loss_ce": 0.0047097052447497845, + "loss_iou": 0.31640625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 368012024, + "step": 3795 + }, + { + "epoch": 0.3711380524051623, + "grad_norm": 19.099799520453953, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 368109396, + "step": 3796 + }, + { + "epoch": 0.3711380524051623, + "loss": 0.0732760801911354, + "loss_ce": 0.005160846747457981, + "loss_iou": 0.388671875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 368109396, + "step": 3796 + }, + { + "epoch": 0.37123582323034804, + "grad_norm": 10.55218499552128, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 368207128, + "step": 3797 + }, + { + "epoch": 0.37123582323034804, + "loss": 0.1189444437623024, + "loss_ce": 0.003282815683633089, + "loss_iou": 0.41015625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 368207128, + "step": 3797 + }, + { + "epoch": 0.37133359405553384, + "grad_norm": 11.995288571301499, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 368304436, + "step": 3798 + }, + { + "epoch": 0.37133359405553384, + "loss": 0.07972018420696259, + "loss_ce": 0.001961394213140011, + "loss_iou": 0.37109375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 368304436, + "step": 3798 + }, + { + "epoch": 0.3714313648807196, + "grad_norm": 4.524992703307872, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 368402428, + "step": 3799 + }, + { + "epoch": 0.3714313648807196, + "loss": 0.05245477333664894, + "loss_ce": 0.0021770631428807974, + "loss_iou": 0.431640625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 368402428, + "step": 3799 + }, + { + "epoch": 0.37152913570590534, + "grad_norm": 7.302261822999985, + "learning_rate": 5e-05, + "loss": 0.11, + "num_input_tokens_seen": 368498976, + "step": 3800 + }, + { + "epoch": 0.37152913570590534, + "loss": 0.14242054522037506, + "loss_ce": 0.0065868026576936245, + "loss_iou": 0.373046875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 368498976, + "step": 3800 + }, + { + "epoch": 0.37162690653109115, + "grad_norm": 5.605216065794156, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 368595360, + "step": 3801 + }, + { + "epoch": 0.37162690653109115, + "loss": 0.11425280570983887, + "loss_ce": 0.005991694517433643, + "loss_iou": 0.29296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 368595360, + "step": 3801 + }, + { + "epoch": 0.3717246773562769, + "grad_norm": 5.904735154359812, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 368692812, + "step": 3802 + }, + { + "epoch": 0.3717246773562769, + "loss": 0.14142435789108276, + "loss_ce": 0.005407501012086868, + "loss_iou": 0.353515625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 368692812, + "step": 3802 + }, + { + "epoch": 0.37182244818146265, + "grad_norm": 3.839888686796334, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 368790392, + "step": 3803 + }, + { + "epoch": 0.37182244818146265, + "loss": 0.06225604563951492, + "loss_ce": 0.0023653015960007906, + "loss_iou": 0.294921875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 368790392, + "step": 3803 + }, + { + "epoch": 0.3719202190066484, + "grad_norm": 3.1943896895118282, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 368886348, + "step": 3804 + }, + { + "epoch": 0.3719202190066484, + "loss": 0.0960145965218544, + "loss_ce": 0.006300546228885651, + "loss_iou": 0.271484375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 368886348, + "step": 3804 + }, + { + "epoch": 0.3720179898318342, + "grad_norm": 5.006996074902767, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 368983704, + "step": 3805 + }, + { + "epoch": 0.3720179898318342, + "loss": 0.06073782220482826, + "loss_ce": 0.006569121032953262, + "loss_iou": 0.3671875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 368983704, + "step": 3805 + }, + { + "epoch": 0.37211576065701996, + "grad_norm": 5.282086547496906, + "learning_rate": 5e-05, + "loss": 0.0945, + "num_input_tokens_seen": 369081036, + "step": 3806 + }, + { + "epoch": 0.37211576065701996, + "loss": 0.0989392101764679, + "loss_ce": 0.007592464331537485, + "loss_iou": 0.318359375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 369081036, + "step": 3806 + }, + { + "epoch": 0.3722135314822057, + "grad_norm": 6.05343250085485, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 369177816, + "step": 3807 + }, + { + "epoch": 0.3722135314822057, + "loss": 0.048658743500709534, + "loss_ce": 0.002684012521058321, + "loss_iou": 0.322265625, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 369177816, + "step": 3807 + }, + { + "epoch": 0.37231130230739146, + "grad_norm": 7.388904528127343, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 369275188, + "step": 3808 + }, + { + "epoch": 0.37231130230739146, + "loss": 0.09219174087047577, + "loss_ce": 0.004911468364298344, + "loss_iou": 0.318359375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 369275188, + "step": 3808 + }, + { + "epoch": 0.37240907313257726, + "grad_norm": 14.362226668129113, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 369371616, + "step": 3809 + }, + { + "epoch": 0.37240907313257726, + "loss": 0.0955575704574585, + "loss_ce": 0.0045770322903990746, + "loss_iou": 0.193359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 369371616, + "step": 3809 + }, + { + "epoch": 0.372506843957763, + "grad_norm": 19.451253079511705, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 369469304, + "step": 3810 + }, + { + "epoch": 0.372506843957763, + "loss": 0.09475409984588623, + "loss_ce": 0.0029572290368378162, + "loss_iou": 0.2578125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 369469304, + "step": 3810 + }, + { + "epoch": 0.37260461478294876, + "grad_norm": 15.325402626808362, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 369567544, + "step": 3811 + }, + { + "epoch": 0.37260461478294876, + "loss": 0.0714682787656784, + "loss_ce": 0.0036582285538315773, + "loss_iou": 0.298828125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 369567544, + "step": 3811 + }, + { + "epoch": 0.3727023856081345, + "grad_norm": 23.34681999805899, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 369665116, + "step": 3812 + }, + { + "epoch": 0.3727023856081345, + "loss": 0.09365673363208771, + "loss_ce": 0.006010245531797409, + "loss_iou": 0.427734375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 369665116, + "step": 3812 + }, + { + "epoch": 0.3728001564333203, + "grad_norm": 21.601622765273287, + "learning_rate": 5e-05, + "loss": 0.1278, + "num_input_tokens_seen": 369762652, + "step": 3813 + }, + { + "epoch": 0.3728001564333203, + "loss": 0.13853606581687927, + "loss_ce": 0.007272331975400448, + "loss_iou": 0.333984375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 369762652, + "step": 3813 + }, + { + "epoch": 0.37289792725850607, + "grad_norm": 29.46029292330467, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 369859176, + "step": 3814 + }, + { + "epoch": 0.37289792725850607, + "loss": 0.0956520214676857, + "loss_ce": 0.008692181669175625, + "loss_iou": 0.3359375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 369859176, + "step": 3814 + }, + { + "epoch": 0.3729956980836918, + "grad_norm": 17.109162879716187, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 369957116, + "step": 3815 + }, + { + "epoch": 0.3729956980836918, + "loss": 0.0965261235833168, + "loss_ce": 0.008330320939421654, + "loss_iou": 0.30859375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 369957116, + "step": 3815 + }, + { + "epoch": 0.37309346890887757, + "grad_norm": 8.145537436556051, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 370053796, + "step": 3816 + }, + { + "epoch": 0.37309346890887757, + "loss": 0.08069127798080444, + "loss_ce": 0.005038205534219742, + "loss_iou": 0.328125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 370053796, + "step": 3816 + }, + { + "epoch": 0.3731912397340634, + "grad_norm": 14.014207635128363, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 370150116, + "step": 3817 + }, + { + "epoch": 0.3731912397340634, + "loss": 0.06571793556213379, + "loss_ce": 0.005533455405384302, + "loss_iou": 0.2578125, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 370150116, + "step": 3817 + }, + { + "epoch": 0.3732890105592491, + "grad_norm": 18.378978245187028, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 370246188, + "step": 3818 + }, + { + "epoch": 0.3732890105592491, + "loss": 0.04444463551044464, + "loss_ce": 0.003711300203576684, + "loss_iou": 0.1513671875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 370246188, + "step": 3818 + }, + { + "epoch": 0.3733867813844349, + "grad_norm": 14.364365525987976, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 370342800, + "step": 3819 + }, + { + "epoch": 0.3733867813844349, + "loss": 0.06398716568946838, + "loss_ce": 0.007827186957001686, + "loss_iou": 0.240234375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 370342800, + "step": 3819 + }, + { + "epoch": 0.3734845522096206, + "grad_norm": 6.071630736992916, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 370439188, + "step": 3820 + }, + { + "epoch": 0.3734845522096206, + "loss": 0.06812863051891327, + "loss_ce": 0.0034084785729646683, + "loss_iou": 0.296875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 370439188, + "step": 3820 + }, + { + "epoch": 0.37358232303480643, + "grad_norm": 3.759390612358576, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 370537112, + "step": 3821 + }, + { + "epoch": 0.37358232303480643, + "loss": 0.07512491941452026, + "loss_ce": 0.0016691035125404596, + "loss_iou": 0.3984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 370537112, + "step": 3821 + }, + { + "epoch": 0.3736800938599922, + "grad_norm": 7.481280435968995, + "learning_rate": 5e-05, + "loss": 0.1292, + "num_input_tokens_seen": 370634676, + "step": 3822 + }, + { + "epoch": 0.3736800938599922, + "loss": 0.13063067197799683, + "loss_ce": 0.009811586700379848, + "loss_iou": 0.357421875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 370634676, + "step": 3822 + }, + { + "epoch": 0.37377786468517793, + "grad_norm": 3.6710797890686964, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 370731772, + "step": 3823 + }, + { + "epoch": 0.37377786468517793, + "loss": 0.08866245299577713, + "loss_ce": 0.007470440119504929, + "loss_iou": 0.302734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 370731772, + "step": 3823 + }, + { + "epoch": 0.37387563551036374, + "grad_norm": 10.469530674571493, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 370828588, + "step": 3824 + }, + { + "epoch": 0.37387563551036374, + "loss": 0.0677446573972702, + "loss_ce": 0.0014452136820182204, + "loss_iou": 0.314453125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 370828588, + "step": 3824 + }, + { + "epoch": 0.3739734063355495, + "grad_norm": 6.602086472260308, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 370924932, + "step": 3825 + }, + { + "epoch": 0.3739734063355495, + "loss": 0.0950872004032135, + "loss_ce": 0.002985146827995777, + "loss_iou": 0.375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 370924932, + "step": 3825 + }, + { + "epoch": 0.37407117716073524, + "grad_norm": 7.759177563780671, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 371021744, + "step": 3826 + }, + { + "epoch": 0.37407117716073524, + "loss": 0.10290133953094482, + "loss_ce": 0.007274505216628313, + "loss_iou": 0.373046875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 371021744, + "step": 3826 + }, + { + "epoch": 0.374168947985921, + "grad_norm": 11.737124197348964, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 371119060, + "step": 3827 + }, + { + "epoch": 0.374168947985921, + "loss": 0.10293009132146835, + "loss_ce": 0.004388830624520779, + "loss_iou": 0.39453125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 371119060, + "step": 3827 + }, + { + "epoch": 0.3742667188111068, + "grad_norm": 15.359266110644667, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 371215684, + "step": 3828 + }, + { + "epoch": 0.3742667188111068, + "loss": 0.08952909708023071, + "loss_ce": 0.004247721284627914, + "loss_iou": 0.314453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 371215684, + "step": 3828 + }, + { + "epoch": 0.37436448963629254, + "grad_norm": 4.950981738182513, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 371312312, + "step": 3829 + }, + { + "epoch": 0.37436448963629254, + "loss": 0.10136884450912476, + "loss_ce": 0.0015763640403747559, + "loss_iou": 0.45703125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 371312312, + "step": 3829 + }, + { + "epoch": 0.3744622604614783, + "grad_norm": 4.547395260183853, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 371408672, + "step": 3830 + }, + { + "epoch": 0.3744622604614783, + "loss": 0.10114327818155289, + "loss_ce": 0.005249416455626488, + "loss_iou": 0.26953125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 371408672, + "step": 3830 + }, + { + "epoch": 0.37456003128666404, + "grad_norm": 15.44467779650767, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 371505684, + "step": 3831 + }, + { + "epoch": 0.37456003128666404, + "loss": 0.0697142630815506, + "loss_ce": 0.004925444256514311, + "loss_iou": 0.345703125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 371505684, + "step": 3831 + }, + { + "epoch": 0.37465780211184985, + "grad_norm": 22.967164937102385, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 371603336, + "step": 3832 + }, + { + "epoch": 0.37465780211184985, + "loss": 0.0846615806221962, + "loss_ce": 0.0028592138551175594, + "loss_iou": 0.33984375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 371603336, + "step": 3832 + }, + { + "epoch": 0.3747555729370356, + "grad_norm": 5.208611429199049, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 371700308, + "step": 3833 + }, + { + "epoch": 0.3747555729370356, + "loss": 0.09508373588323593, + "loss_ce": 0.005850335583090782, + "loss_iou": 0.33203125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 371700308, + "step": 3833 + }, + { + "epoch": 0.37485334376222135, + "grad_norm": 3.9696363533790002, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 371797148, + "step": 3834 + }, + { + "epoch": 0.37485334376222135, + "loss": 0.06847064197063446, + "loss_ce": 0.0035902715753763914, + "loss_iou": 0.322265625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 371797148, + "step": 3834 + }, + { + "epoch": 0.3749511145874071, + "grad_norm": 12.352916975590096, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 371894248, + "step": 3835 + }, + { + "epoch": 0.3749511145874071, + "loss": 0.06241486221551895, + "loss_ce": 0.009009098634123802, + "loss_iou": 0.267578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 371894248, + "step": 3835 + }, + { + "epoch": 0.3750488854125929, + "grad_norm": 5.09188509222152, + "learning_rate": 5e-05, + "loss": 0.1185, + "num_input_tokens_seen": 371991872, + "step": 3836 + }, + { + "epoch": 0.3750488854125929, + "loss": 0.11088347434997559, + "loss_ce": 0.005658866837620735, + "loss_iou": 0.416015625, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 371991872, + "step": 3836 + }, + { + "epoch": 0.37514665623777865, + "grad_norm": 3.751962433400241, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 372088660, + "step": 3837 + }, + { + "epoch": 0.37514665623777865, + "loss": 0.06467227637767792, + "loss_ce": 0.00662783719599247, + "loss_iou": 0.388671875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 372088660, + "step": 3837 + }, + { + "epoch": 0.3752444270629644, + "grad_norm": 3.788406921482721, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 372184492, + "step": 3838 + }, + { + "epoch": 0.3752444270629644, + "loss": 0.08210690319538116, + "loss_ce": 0.0047448426485061646, + "loss_iou": 0.40234375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 372184492, + "step": 3838 + }, + { + "epoch": 0.37534219788815015, + "grad_norm": 12.375745010424641, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 372281312, + "step": 3839 + }, + { + "epoch": 0.37534219788815015, + "loss": 0.12818391621112823, + "loss_ce": 0.0057779112830758095, + "loss_iou": 0.255859375, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 372281312, + "step": 3839 + }, + { + "epoch": 0.37543996871333596, + "grad_norm": 5.089878881436807, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 372378764, + "step": 3840 + }, + { + "epoch": 0.37543996871333596, + "loss": 0.13926661014556885, + "loss_ce": 0.007979982532560825, + "loss_iou": 0.2216796875, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 372378764, + "step": 3840 + }, + { + "epoch": 0.3755377395385217, + "grad_norm": 2.9593036862145716, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 372475604, + "step": 3841 + }, + { + "epoch": 0.3755377395385217, + "loss": 0.06451082974672318, + "loss_ce": 0.004345422610640526, + "loss_iou": 0.1357421875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 372475604, + "step": 3841 + }, + { + "epoch": 0.37563551036370746, + "grad_norm": 2.71010712517801, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 372572580, + "step": 3842 + }, + { + "epoch": 0.37563551036370746, + "loss": 0.05184761807322502, + "loss_ce": 0.007578055374324322, + "loss_iou": 0.357421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 372572580, + "step": 3842 + }, + { + "epoch": 0.3757332811888932, + "grad_norm": 8.349438210469627, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 372669256, + "step": 3843 + }, + { + "epoch": 0.3757332811888932, + "loss": 0.08096586912870407, + "loss_ce": 0.00768553139641881, + "loss_iou": 0.197265625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 372669256, + "step": 3843 + }, + { + "epoch": 0.375831052014079, + "grad_norm": 26.882681666136254, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 372766492, + "step": 3844 + }, + { + "epoch": 0.375831052014079, + "loss": 0.1305827796459198, + "loss_ce": 0.010755505412817001, + "loss_iou": 0.1689453125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 372766492, + "step": 3844 + }, + { + "epoch": 0.37592882283926476, + "grad_norm": 9.871296021130956, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 372863120, + "step": 3845 + }, + { + "epoch": 0.37592882283926476, + "loss": 0.04457320645451546, + "loss_ce": 0.005586998537182808, + "loss_iou": 0.38671875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 372863120, + "step": 3845 + }, + { + "epoch": 0.3760265936644505, + "grad_norm": 8.164546884077629, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 372959964, + "step": 3846 + }, + { + "epoch": 0.3760265936644505, + "loss": 0.08491258323192596, + "loss_ce": 0.004559796303510666, + "loss_iou": 0.32421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 372959964, + "step": 3846 + }, + { + "epoch": 0.37612436448963626, + "grad_norm": 5.655010382176651, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 373056840, + "step": 3847 + }, + { + "epoch": 0.37612436448963626, + "loss": 0.04348728805780411, + "loss_ce": 0.0036275191232562065, + "loss_iou": 0.2294921875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 373056840, + "step": 3847 + }, + { + "epoch": 0.37622213531482207, + "grad_norm": 7.9686854482655605, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 373153412, + "step": 3848 + }, + { + "epoch": 0.37622213531482207, + "loss": 0.11327514052391052, + "loss_ce": 0.006616204511374235, + "loss_iou": 0.3046875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 373153412, + "step": 3848 + }, + { + "epoch": 0.3763199061400078, + "grad_norm": 27.559416707308124, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 373250460, + "step": 3849 + }, + { + "epoch": 0.3763199061400078, + "loss": 0.10751562565565109, + "loss_ce": 0.006502438336610794, + "loss_iou": 0.251953125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 373250460, + "step": 3849 + }, + { + "epoch": 0.37641767696519357, + "grad_norm": 40.24641825817644, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 373346896, + "step": 3850 + }, + { + "epoch": 0.37641767696519357, + "loss": 0.06191667169332504, + "loss_ce": 0.007297835312783718, + "loss_iou": 0.265625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 373346896, + "step": 3850 + }, + { + "epoch": 0.3765154477903794, + "grad_norm": 53.644343075700476, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 373443804, + "step": 3851 + }, + { + "epoch": 0.3765154477903794, + "loss": 0.0936824157834053, + "loss_ce": 0.009850628674030304, + "loss_iou": 0.328125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 373443804, + "step": 3851 + }, + { + "epoch": 0.3766132186155651, + "grad_norm": 42.32295092826624, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 373540896, + "step": 3852 + }, + { + "epoch": 0.3766132186155651, + "loss": 0.0730813592672348, + "loss_ce": 0.009406433440744877, + "loss_iou": 0.26953125, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 373540896, + "step": 3852 + }, + { + "epoch": 0.3767109894407509, + "grad_norm": 7.174652258407042, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 373638792, + "step": 3853 + }, + { + "epoch": 0.3767109894407509, + "loss": 0.048930730670690536, + "loss_ce": 0.0031238480005413294, + "loss_iou": 0.373046875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 373638792, + "step": 3853 + }, + { + "epoch": 0.3768087602659366, + "grad_norm": 6.429825189304149, + "learning_rate": 5e-05, + "loss": 0.1256, + "num_input_tokens_seen": 373736560, + "step": 3854 + }, + { + "epoch": 0.3768087602659366, + "loss": 0.1288284808397293, + "loss_ce": 0.0069412728771567345, + "loss_iou": 0.4296875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 373736560, + "step": 3854 + }, + { + "epoch": 0.37690653109112243, + "grad_norm": 7.684405109502354, + "learning_rate": 5e-05, + "loss": 0.1289, + "num_input_tokens_seen": 373834104, + "step": 3855 + }, + { + "epoch": 0.37690653109112243, + "loss": 0.17511287331581116, + "loss_ce": 0.01047816127538681, + "loss_iou": 0.376953125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 373834104, + "step": 3855 + }, + { + "epoch": 0.3770043019163082, + "grad_norm": 4.8792865269759975, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 373931744, + "step": 3856 + }, + { + "epoch": 0.3770043019163082, + "loss": 0.11951257288455963, + "loss_ce": 0.01939966343343258, + "loss_iou": 0.306640625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 373931744, + "step": 3856 + }, + { + "epoch": 0.37710207274149393, + "grad_norm": 7.387250422070433, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 374027636, + "step": 3857 + }, + { + "epoch": 0.37710207274149393, + "loss": 0.11171014606952667, + "loss_ce": 0.002457219874486327, + "loss_iou": 0.421875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 374027636, + "step": 3857 + }, + { + "epoch": 0.3771998435666797, + "grad_norm": 3.2128938749402294, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 374123692, + "step": 3858 + }, + { + "epoch": 0.3771998435666797, + "loss": 0.05579373985528946, + "loss_ce": 0.006118755787611008, + "loss_iou": 0.1943359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 374123692, + "step": 3858 + }, + { + "epoch": 0.3772976143918655, + "grad_norm": 5.551971347022423, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 374220640, + "step": 3859 + }, + { + "epoch": 0.3772976143918655, + "loss": 0.06602773070335388, + "loss_ce": 0.0060912142507731915, + "loss_iou": 0.33984375, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 374220640, + "step": 3859 + }, + { + "epoch": 0.37739538521705124, + "grad_norm": 6.614286388793839, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 374318048, + "step": 3860 + }, + { + "epoch": 0.37739538521705124, + "loss": 0.10838451236486435, + "loss_ce": 0.005265613552182913, + "loss_iou": 0.423828125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 374318048, + "step": 3860 + }, + { + "epoch": 0.377493156042237, + "grad_norm": 8.113956716139237, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 374415976, + "step": 3861 + }, + { + "epoch": 0.377493156042237, + "loss": 0.08892247825860977, + "loss_ce": 0.005456901155412197, + "loss_iou": 0.37890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 374415976, + "step": 3861 + }, + { + "epoch": 0.37759092686742274, + "grad_norm": 3.7687277976191376, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 374512572, + "step": 3862 + }, + { + "epoch": 0.37759092686742274, + "loss": 0.10645558685064316, + "loss_ce": 0.008173728361725807, + "loss_iou": 0.259765625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 374512572, + "step": 3862 + }, + { + "epoch": 0.37768869769260854, + "grad_norm": 5.758070167878938, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 374610200, + "step": 3863 + }, + { + "epoch": 0.37768869769260854, + "loss": 0.11447720229625702, + "loss_ce": 0.004171422217041254, + "loss_iou": 0.39453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 374610200, + "step": 3863 + }, + { + "epoch": 0.3777864685177943, + "grad_norm": 4.0396233887377315, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 374705816, + "step": 3864 + }, + { + "epoch": 0.3777864685177943, + "loss": 0.06765595823526382, + "loss_ce": 0.008306900039315224, + "loss_iou": 0.166015625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 374705816, + "step": 3864 + }, + { + "epoch": 0.37788423934298004, + "grad_norm": 4.595147483435564, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 374802952, + "step": 3865 + }, + { + "epoch": 0.37788423934298004, + "loss": 0.10684431344270706, + "loss_ce": 0.007280717603862286, + "loss_iou": 0.2890625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 374802952, + "step": 3865 + }, + { + "epoch": 0.3779820101681658, + "grad_norm": 2.611026465927608, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 374898700, + "step": 3866 + }, + { + "epoch": 0.3779820101681658, + "loss": 0.05776434391736984, + "loss_ce": 0.004297545645385981, + "loss_iou": 0.189453125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 374898700, + "step": 3866 + }, + { + "epoch": 0.3780797809933516, + "grad_norm": 3.302636051378312, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 374996168, + "step": 3867 + }, + { + "epoch": 0.3780797809933516, + "loss": 0.0915830135345459, + "loss_ce": 0.01020789984613657, + "loss_iou": 0.337890625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 374996168, + "step": 3867 + }, + { + "epoch": 0.37817755181853735, + "grad_norm": 3.207245418546759, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 375092372, + "step": 3868 + }, + { + "epoch": 0.37817755181853735, + "loss": 0.04766763746738434, + "loss_ce": 0.005736487451940775, + "loss_iou": 0.29296875, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 375092372, + "step": 3868 + }, + { + "epoch": 0.3782753226437231, + "grad_norm": 3.634005234875472, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 375189884, + "step": 3869 + }, + { + "epoch": 0.3782753226437231, + "loss": 0.08076977729797363, + "loss_ce": 0.006489988416433334, + "loss_iou": 0.3515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 375189884, + "step": 3869 + }, + { + "epoch": 0.37837309346890885, + "grad_norm": 5.83329284690976, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 375286580, + "step": 3870 + }, + { + "epoch": 0.37837309346890885, + "loss": 0.12657037377357483, + "loss_ce": 0.004316951148211956, + "loss_iou": 0.3515625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 375286580, + "step": 3870 + }, + { + "epoch": 0.37847086429409466, + "grad_norm": 8.68392202221912, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 375383868, + "step": 3871 + }, + { + "epoch": 0.37847086429409466, + "loss": 0.09579340368509293, + "loss_ce": 0.005888616666197777, + "loss_iou": 0.330078125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 375383868, + "step": 3871 + }, + { + "epoch": 0.3785686351192804, + "grad_norm": 2.7061508975533184, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 375480660, + "step": 3872 + }, + { + "epoch": 0.3785686351192804, + "loss": 0.06338360160589218, + "loss_ce": 0.006163141690194607, + "loss_iou": 0.33984375, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 375480660, + "step": 3872 + }, + { + "epoch": 0.37866640594446616, + "grad_norm": 6.329602046870524, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 375578280, + "step": 3873 + }, + { + "epoch": 0.37866640594446616, + "loss": 0.09773564338684082, + "loss_ce": 0.0045883674174547195, + "loss_iou": 0.251953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 375578280, + "step": 3873 + }, + { + "epoch": 0.37876417676965196, + "grad_norm": 11.047488379635741, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 375675540, + "step": 3874 + }, + { + "epoch": 0.37876417676965196, + "loss": 0.10969731211662292, + "loss_ce": 0.004243818111717701, + "loss_iou": 0.326171875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 375675540, + "step": 3874 + }, + { + "epoch": 0.3788619475948377, + "grad_norm": 4.092314998835859, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 375772340, + "step": 3875 + }, + { + "epoch": 0.3788619475948377, + "loss": 0.07881223410367966, + "loss_ce": 0.0076757632195949554, + "loss_iou": 0.359375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 375772340, + "step": 3875 + }, + { + "epoch": 0.37895971842002346, + "grad_norm": 3.908542064310717, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 375869244, + "step": 3876 + }, + { + "epoch": 0.37895971842002346, + "loss": 0.07690128684043884, + "loss_ce": 0.005121084861457348, + "loss_iou": 0.27734375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 375869244, + "step": 3876 + }, + { + "epoch": 0.3790574892452092, + "grad_norm": 12.793654401589858, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 375965700, + "step": 3877 + }, + { + "epoch": 0.3790574892452092, + "loss": 0.1171378567814827, + "loss_ce": 0.0034751398488879204, + "loss_iou": 0.287109375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 375965700, + "step": 3877 + }, + { + "epoch": 0.379155260070395, + "grad_norm": 16.939235020959288, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 376062692, + "step": 3878 + }, + { + "epoch": 0.379155260070395, + "loss": 0.08122748136520386, + "loss_ce": 0.011143862269818783, + "loss_iou": 0.306640625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 376062692, + "step": 3878 + }, + { + "epoch": 0.37925303089558077, + "grad_norm": 13.232638161659654, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 376159728, + "step": 3879 + }, + { + "epoch": 0.37925303089558077, + "loss": 0.11183314025402069, + "loss_ce": 0.006272831000387669, + "loss_iou": 0.3984375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 376159728, + "step": 3879 + }, + { + "epoch": 0.3793508017207665, + "grad_norm": 4.435099386413398, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 376256272, + "step": 3880 + }, + { + "epoch": 0.3793508017207665, + "loss": 0.09764908254146576, + "loss_ce": 0.01030777022242546, + "loss_iou": 0.33984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 376256272, + "step": 3880 + }, + { + "epoch": 0.37944857254595227, + "grad_norm": 6.77882814250885, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 376352280, + "step": 3881 + }, + { + "epoch": 0.37944857254595227, + "loss": 0.056649889796972275, + "loss_ce": 0.005914417095482349, + "loss_iou": 0.2431640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 376352280, + "step": 3881 + }, + { + "epoch": 0.3795463433711381, + "grad_norm": 3.030840060621757, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 376448652, + "step": 3882 + }, + { + "epoch": 0.3795463433711381, + "loss": 0.09412112832069397, + "loss_ce": 0.010838663205504417, + "loss_iou": 0.2041015625, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 376448652, + "step": 3882 + }, + { + "epoch": 0.3796441141963238, + "grad_norm": 4.034847965771225, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 376546244, + "step": 3883 + }, + { + "epoch": 0.3796441141963238, + "loss": 0.05964329093694687, + "loss_ce": 0.003048444166779518, + "loss_iou": 0.302734375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 376546244, + "step": 3883 + }, + { + "epoch": 0.3797418850215096, + "grad_norm": 5.889319132941773, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 376643344, + "step": 3884 + }, + { + "epoch": 0.3797418850215096, + "loss": 0.0868135467171669, + "loss_ce": 0.004019351210445166, + "loss_iou": 0.248046875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 376643344, + "step": 3884 + }, + { + "epoch": 0.3798396558466953, + "grad_norm": 13.59492865662454, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 376740860, + "step": 3885 + }, + { + "epoch": 0.3798396558466953, + "loss": 0.05468267202377319, + "loss_ce": 0.004206595476716757, + "loss_iou": 0.38671875, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 376740860, + "step": 3885 + }, + { + "epoch": 0.37993742667188113, + "grad_norm": 23.337690194490307, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 376837900, + "step": 3886 + }, + { + "epoch": 0.37993742667188113, + "loss": 0.05084781348705292, + "loss_ce": 0.0060174912214279175, + "loss_iou": 0.310546875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 376837900, + "step": 3886 + }, + { + "epoch": 0.3800351974970669, + "grad_norm": 17.425016611724914, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 376933824, + "step": 3887 + }, + { + "epoch": 0.3800351974970669, + "loss": 0.05564822629094124, + "loss_ce": 0.003211397211998701, + "loss_iou": 0.3125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 376933824, + "step": 3887 + }, + { + "epoch": 0.38013296832225263, + "grad_norm": 11.214180303847671, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 377031536, + "step": 3888 + }, + { + "epoch": 0.38013296832225263, + "loss": 0.038709577172994614, + "loss_ce": 0.0035075508058071136, + "loss_iou": 0.310546875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 377031536, + "step": 3888 + }, + { + "epoch": 0.3802307391474384, + "grad_norm": 18.857711968716085, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 377128072, + "step": 3889 + }, + { + "epoch": 0.3802307391474384, + "loss": 0.11345478892326355, + "loss_ce": 0.008321735076606274, + "loss_iou": 0.42578125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 377128072, + "step": 3889 + }, + { + "epoch": 0.3803285099726242, + "grad_norm": 12.469132959649217, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 377225232, + "step": 3890 + }, + { + "epoch": 0.3803285099726242, + "loss": 0.08581391721963882, + "loss_ce": 0.005621357820928097, + "loss_iou": 0.3203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 377225232, + "step": 3890 + }, + { + "epoch": 0.38042628079780993, + "grad_norm": 4.661333315750101, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 377322356, + "step": 3891 + }, + { + "epoch": 0.38042628079780993, + "loss": 0.10242331773042679, + "loss_ce": 0.010824805125594139, + "loss_iou": 0.2578125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 377322356, + "step": 3891 + }, + { + "epoch": 0.3805240516229957, + "grad_norm": 5.651598770479649, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 377419588, + "step": 3892 + }, + { + "epoch": 0.3805240516229957, + "loss": 0.11500371247529984, + "loss_ce": 0.004881036467850208, + "loss_iou": 0.2275390625, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 377419588, + "step": 3892 + }, + { + "epoch": 0.38062182244818143, + "grad_norm": 10.486543818210809, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 377517080, + "step": 3893 + }, + { + "epoch": 0.38062182244818143, + "loss": 0.0981755405664444, + "loss_ce": 0.008331796154379845, + "loss_iou": 0.40234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 377517080, + "step": 3893 + }, + { + "epoch": 0.38071959327336724, + "grad_norm": 8.84464068566633, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 377613812, + "step": 3894 + }, + { + "epoch": 0.38071959327336724, + "loss": 0.04012525826692581, + "loss_ce": 0.004320509731769562, + "loss_iou": 0.1884765625, + "loss_num": 0.00714111328125, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 377613812, + "step": 3894 + }, + { + "epoch": 0.380817364098553, + "grad_norm": 11.876773844558219, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 377711300, + "step": 3895 + }, + { + "epoch": 0.380817364098553, + "loss": 0.08301396667957306, + "loss_ce": 0.004568533971905708, + "loss_iou": 0.388671875, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 377711300, + "step": 3895 + }, + { + "epoch": 0.38091513492373874, + "grad_norm": 15.26708017707783, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 377809128, + "step": 3896 + }, + { + "epoch": 0.38091513492373874, + "loss": 0.055004894733428955, + "loss_ce": 0.004467780701816082, + "loss_iou": 0.33984375, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 377809128, + "step": 3896 + }, + { + "epoch": 0.38101290574892455, + "grad_norm": 16.68970663060912, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 377906944, + "step": 3897 + }, + { + "epoch": 0.38101290574892455, + "loss": 0.09723032265901566, + "loss_ce": 0.0075391605496406555, + "loss_iou": 0.3984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 377906944, + "step": 3897 + }, + { + "epoch": 0.3811106765741103, + "grad_norm": 10.487611012469497, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 378003604, + "step": 3898 + }, + { + "epoch": 0.3811106765741103, + "loss": 0.07996786385774612, + "loss_ce": 0.0018123490735888481, + "loss_iou": 0.40234375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 378003604, + "step": 3898 + }, + { + "epoch": 0.38120844739929605, + "grad_norm": 4.830699074799542, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 378100772, + "step": 3899 + }, + { + "epoch": 0.38120844739929605, + "loss": 0.08191598951816559, + "loss_ce": 0.0032264171168208122, + "loss_iou": 0.263671875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 378100772, + "step": 3899 + }, + { + "epoch": 0.3813062182244818, + "grad_norm": 4.666583058657591, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 378197612, + "step": 3900 + }, + { + "epoch": 0.3813062182244818, + "loss": 0.05972347408533096, + "loss_ce": 0.0030752213206142187, + "loss_iou": 0.2431640625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 378197612, + "step": 3900 + }, + { + "epoch": 0.3814039890496676, + "grad_norm": 8.983746912841875, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 378294884, + "step": 3901 + }, + { + "epoch": 0.3814039890496676, + "loss": 0.03397141396999359, + "loss_ce": 0.002774822060018778, + "loss_iou": 0.357421875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 378294884, + "step": 3901 + }, + { + "epoch": 0.38150175987485335, + "grad_norm": 9.674686750512215, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 378391308, + "step": 3902 + }, + { + "epoch": 0.38150175987485335, + "loss": 0.08400563150644302, + "loss_ce": 0.008383072912693024, + "loss_iou": 0.322265625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 378391308, + "step": 3902 + }, + { + "epoch": 0.3815995307000391, + "grad_norm": 21.45087424911894, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 378488476, + "step": 3903 + }, + { + "epoch": 0.3815995307000391, + "loss": 0.09069547057151794, + "loss_ce": 0.005520915612578392, + "loss_iou": 0.283203125, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 378488476, + "step": 3903 + }, + { + "epoch": 0.38169730152522485, + "grad_norm": 10.040640252329055, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 378585496, + "step": 3904 + }, + { + "epoch": 0.38169730152522485, + "loss": 0.08845037966966629, + "loss_ce": 0.006174991838634014, + "loss_iou": 0.296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 378585496, + "step": 3904 + }, + { + "epoch": 0.38179507235041066, + "grad_norm": 10.165013442570377, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 378682292, + "step": 3905 + }, + { + "epoch": 0.38179507235041066, + "loss": 0.0614183247089386, + "loss_ce": 0.0038163980934768915, + "loss_iou": 0.3515625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 378682292, + "step": 3905 + }, + { + "epoch": 0.3818928431755964, + "grad_norm": 5.410968069281227, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 378778816, + "step": 3906 + }, + { + "epoch": 0.3818928431755964, + "loss": 0.07653555274009705, + "loss_ce": 0.005566922947764397, + "loss_iou": 0.2041015625, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 378778816, + "step": 3906 + }, + { + "epoch": 0.38199061400078216, + "grad_norm": 4.700221718353901, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 378875704, + "step": 3907 + }, + { + "epoch": 0.38199061400078216, + "loss": 0.07657110691070557, + "loss_ce": 0.0016504486557096243, + "loss_iou": 0.2451171875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 378875704, + "step": 3907 + }, + { + "epoch": 0.3820883848259679, + "grad_norm": 11.457775350999052, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 378973764, + "step": 3908 + }, + { + "epoch": 0.3820883848259679, + "loss": 0.09504671394824982, + "loss_ce": 0.009719569236040115, + "loss_iou": 0.359375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 378973764, + "step": 3908 + }, + { + "epoch": 0.3821861556511537, + "grad_norm": 18.994324699127006, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 379070868, + "step": 3909 + }, + { + "epoch": 0.3821861556511537, + "loss": 0.06171302869915962, + "loss_ce": 0.002325821667909622, + "loss_iou": 0.337890625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 379070868, + "step": 3909 + }, + { + "epoch": 0.38228392647633946, + "grad_norm": 5.631075257670106, + "learning_rate": 5e-05, + "loss": 0.1164, + "num_input_tokens_seen": 379167948, + "step": 3910 + }, + { + "epoch": 0.38228392647633946, + "loss": 0.12080994248390198, + "loss_ce": 0.007543941494077444, + "loss_iou": 0.365234375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 379167948, + "step": 3910 + }, + { + "epoch": 0.3823816973015252, + "grad_norm": 3.172067559042034, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 379265260, + "step": 3911 + }, + { + "epoch": 0.3823816973015252, + "loss": 0.07622544467449188, + "loss_ce": 0.006538549903780222, + "loss_iou": 0.44140625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 379265260, + "step": 3911 + }, + { + "epoch": 0.38247946812671096, + "grad_norm": 4.636431702091922, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 379362156, + "step": 3912 + }, + { + "epoch": 0.38247946812671096, + "loss": 0.0962451696395874, + "loss_ce": 0.005562190897762775, + "loss_iou": 0.3203125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 379362156, + "step": 3912 + }, + { + "epoch": 0.38257723895189677, + "grad_norm": 8.616183460655993, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 379459516, + "step": 3913 + }, + { + "epoch": 0.38257723895189677, + "loss": 0.08371568471193314, + "loss_ce": 0.005148178897798061, + "loss_iou": 0.369140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 379459516, + "step": 3913 + }, + { + "epoch": 0.3826750097770825, + "grad_norm": 5.459414249942891, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 379556180, + "step": 3914 + }, + { + "epoch": 0.3826750097770825, + "loss": 0.058808334171772, + "loss_ce": 0.006470688618719578, + "loss_iou": 0.37109375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 379556180, + "step": 3914 + }, + { + "epoch": 0.38277278060226827, + "grad_norm": 9.16240618788384, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 379653532, + "step": 3915 + }, + { + "epoch": 0.38277278060226827, + "loss": 0.07558834552764893, + "loss_ce": 0.004604456480592489, + "loss_iou": 0.2578125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 379653532, + "step": 3915 + }, + { + "epoch": 0.382870551427454, + "grad_norm": 6.002126923755894, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 379750336, + "step": 3916 + }, + { + "epoch": 0.382870551427454, + "loss": 0.052040159702301025, + "loss_ce": 0.0054779681377112865, + "loss_iou": 0.294921875, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 379750336, + "step": 3916 + }, + { + "epoch": 0.3829683222526398, + "grad_norm": 11.800427773670865, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 379847580, + "step": 3917 + }, + { + "epoch": 0.3829683222526398, + "loss": 0.04509279131889343, + "loss_ce": 0.005145280621945858, + "loss_iou": 0.353515625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 379847580, + "step": 3917 + }, + { + "epoch": 0.3830660930778256, + "grad_norm": 9.0411646733457, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 379944392, + "step": 3918 + }, + { + "epoch": 0.3830660930778256, + "loss": 0.09682382643222809, + "loss_ce": 0.004599713254719973, + "loss_iou": 0.32421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 379944392, + "step": 3918 + }, + { + "epoch": 0.3831638639030113, + "grad_norm": 5.120433540325392, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 380041808, + "step": 3919 + }, + { + "epoch": 0.3831638639030113, + "loss": 0.08659088611602783, + "loss_ce": 0.006024481263011694, + "loss_iou": 0.34375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 380041808, + "step": 3919 + }, + { + "epoch": 0.38326163472819713, + "grad_norm": 5.253207344740657, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 380138936, + "step": 3920 + }, + { + "epoch": 0.38326163472819713, + "loss": 0.07864272594451904, + "loss_ce": 0.0029591249767690897, + "loss_iou": 0.2314453125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 380138936, + "step": 3920 + }, + { + "epoch": 0.3833594055533829, + "grad_norm": 9.0915117892332, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 380236204, + "step": 3921 + }, + { + "epoch": 0.3833594055533829, + "loss": 0.13049879670143127, + "loss_ce": 0.006582176312804222, + "loss_iou": 0.2275390625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 380236204, + "step": 3921 + }, + { + "epoch": 0.38345717637856863, + "grad_norm": 2.4801333104346788, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 380333328, + "step": 3922 + }, + { + "epoch": 0.38345717637856863, + "loss": 0.04349786415696144, + "loss_ce": 0.004038637969642878, + "loss_iou": 0.248046875, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 380333328, + "step": 3922 + }, + { + "epoch": 0.3835549472037544, + "grad_norm": 9.24713898856668, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 380430224, + "step": 3923 + }, + { + "epoch": 0.3835549472037544, + "loss": 0.11305545270442963, + "loss_ce": 0.008128391578793526, + "loss_iou": 0.255859375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 380430224, + "step": 3923 + }, + { + "epoch": 0.3836527180289402, + "grad_norm": 19.458547998499053, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 380526676, + "step": 3924 + }, + { + "epoch": 0.3836527180289402, + "loss": 0.08978503942489624, + "loss_ce": 0.004711566027253866, + "loss_iou": 0.365234375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 380526676, + "step": 3924 + }, + { + "epoch": 0.38375048885412594, + "grad_norm": 10.453106134774043, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 380623436, + "step": 3925 + }, + { + "epoch": 0.38375048885412594, + "loss": 0.06527288258075714, + "loss_ce": 0.0027118418365716934, + "loss_iou": 0.3828125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 380623436, + "step": 3925 + }, + { + "epoch": 0.3838482596793117, + "grad_norm": 6.52212022925353, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 380719780, + "step": 3926 + }, + { + "epoch": 0.3838482596793117, + "loss": 0.12174510210752487, + "loss_ce": 0.005183213856071234, + "loss_iou": 0.31640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 380719780, + "step": 3926 + }, + { + "epoch": 0.38394603050449744, + "grad_norm": 5.967632844732809, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 380815656, + "step": 3927 + }, + { + "epoch": 0.38394603050449744, + "loss": 0.07628589123487473, + "loss_ce": 0.006721073761582375, + "loss_iou": 0.1953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 380815656, + "step": 3927 + }, + { + "epoch": 0.38404380132968324, + "grad_norm": 8.645835689046319, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 380912476, + "step": 3928 + }, + { + "epoch": 0.38404380132968324, + "loss": 0.05638277158141136, + "loss_ce": 0.008432026952505112, + "loss_iou": 0.1923828125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 380912476, + "step": 3928 + }, + { + "epoch": 0.384141572154869, + "grad_norm": 5.997295995491209, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 381010316, + "step": 3929 + }, + { + "epoch": 0.384141572154869, + "loss": 0.06053248047828674, + "loss_ce": 0.005921275354921818, + "loss_iou": 0.263671875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 381010316, + "step": 3929 + }, + { + "epoch": 0.38423934298005474, + "grad_norm": 3.35369891553658, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 381107500, + "step": 3930 + }, + { + "epoch": 0.38423934298005474, + "loss": 0.06899741291999817, + "loss_ce": 0.0044679902493953705, + "loss_iou": 0.259765625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 381107500, + "step": 3930 + }, + { + "epoch": 0.3843371138052405, + "grad_norm": 4.813704151030516, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 381203712, + "step": 3931 + }, + { + "epoch": 0.3843371138052405, + "loss": 0.05648248642683029, + "loss_ce": 0.005457097664475441, + "loss_iou": 0.248046875, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 381203712, + "step": 3931 + }, + { + "epoch": 0.3844348846304263, + "grad_norm": 7.606948267060684, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 381300924, + "step": 3932 + }, + { + "epoch": 0.3844348846304263, + "loss": 0.054943643510341644, + "loss_ce": 0.004471384920179844, + "loss_iou": 0.328125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 381300924, + "step": 3932 + }, + { + "epoch": 0.38453265545561205, + "grad_norm": 11.694593505913707, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 381397956, + "step": 3933 + }, + { + "epoch": 0.38453265545561205, + "loss": 0.062263358384370804, + "loss_ce": 0.008430351503193378, + "loss_iou": 0.240234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 381397956, + "step": 3933 + }, + { + "epoch": 0.3846304262807978, + "grad_norm": 6.827475939257242, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 381495804, + "step": 3934 + }, + { + "epoch": 0.3846304262807978, + "loss": 0.12386111170053482, + "loss_ce": 0.005849631503224373, + "loss_iou": 0.279296875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 381495804, + "step": 3934 + }, + { + "epoch": 0.38472819710598355, + "grad_norm": 6.417567386964139, + "learning_rate": 5e-05, + "loss": 0.1194, + "num_input_tokens_seen": 381592444, + "step": 3935 + }, + { + "epoch": 0.38472819710598355, + "loss": 0.14390358328819275, + "loss_ce": 0.0030039160046726465, + "loss_iou": 0.380859375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 381592444, + "step": 3935 + }, + { + "epoch": 0.38482596793116935, + "grad_norm": 3.5580702014746404, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 381689736, + "step": 3936 + }, + { + "epoch": 0.38482596793116935, + "loss": 0.060567792505025864, + "loss_ce": 0.005971847102046013, + "loss_iou": 0.326171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 381689736, + "step": 3936 + }, + { + "epoch": 0.3849237387563551, + "grad_norm": 14.542427075315205, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 381786364, + "step": 3937 + }, + { + "epoch": 0.3849237387563551, + "loss": 0.07537160068750381, + "loss_ce": 0.003502707928419113, + "loss_iou": 0.34765625, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 381786364, + "step": 3937 + }, + { + "epoch": 0.38502150958154086, + "grad_norm": 12.777612719462462, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 381883192, + "step": 3938 + }, + { + "epoch": 0.38502150958154086, + "loss": 0.06215054541826248, + "loss_ce": 0.0028396304696798325, + "loss_iou": 0.3828125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 381883192, + "step": 3938 + }, + { + "epoch": 0.3851192804067266, + "grad_norm": 3.0766679773172516, + "learning_rate": 5e-05, + "loss": 0.131, + "num_input_tokens_seen": 381979608, + "step": 3939 + }, + { + "epoch": 0.3851192804067266, + "loss": 0.1802922487258911, + "loss_ce": 0.007051591295748949, + "loss_iou": 0.28125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 381979608, + "step": 3939 + }, + { + "epoch": 0.3852170512319124, + "grad_norm": 3.657442267800139, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 382076560, + "step": 3940 + }, + { + "epoch": 0.3852170512319124, + "loss": 0.0880824476480484, + "loss_ce": 0.006478445138782263, + "loss_iou": 0.25390625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 382076560, + "step": 3940 + }, + { + "epoch": 0.38531482205709816, + "grad_norm": 12.072372700314325, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 382174000, + "step": 3941 + }, + { + "epoch": 0.38531482205709816, + "loss": 0.09512036293745041, + "loss_ce": 0.003109866753220558, + "loss_iou": 0.328125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 382174000, + "step": 3941 + }, + { + "epoch": 0.3854125928822839, + "grad_norm": 12.785947306188332, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 382272140, + "step": 3942 + }, + { + "epoch": 0.3854125928822839, + "loss": 0.09670262783765793, + "loss_ce": 0.0026779701001942158, + "loss_iou": 0.3515625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 382272140, + "step": 3942 + }, + { + "epoch": 0.3855103637074697, + "grad_norm": 7.576430499316532, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 382368828, + "step": 3943 + }, + { + "epoch": 0.3855103637074697, + "loss": 0.13524945080280304, + "loss_ce": 0.008906672708690166, + "loss_iou": 0.296875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 382368828, + "step": 3943 + }, + { + "epoch": 0.38560813453265547, + "grad_norm": 10.475899919842961, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 382465760, + "step": 3944 + }, + { + "epoch": 0.38560813453265547, + "loss": 0.10565701127052307, + "loss_ce": 0.009122276678681374, + "loss_iou": 0.2265625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 382465760, + "step": 3944 + }, + { + "epoch": 0.3857059053578412, + "grad_norm": 17.55485995509905, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 382562048, + "step": 3945 + }, + { + "epoch": 0.3857059053578412, + "loss": 0.0782303661108017, + "loss_ce": 0.007242662366479635, + "loss_iou": 0.208984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 382562048, + "step": 3945 + }, + { + "epoch": 0.38580367618302697, + "grad_norm": 9.216746485035724, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 382658332, + "step": 3946 + }, + { + "epoch": 0.38580367618302697, + "loss": 0.08870857954025269, + "loss_ce": 0.0042054057121276855, + "loss_iou": 0.16796875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 382658332, + "step": 3946 + }, + { + "epoch": 0.3859014470082128, + "grad_norm": 2.733892596777807, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 382753988, + "step": 3947 + }, + { + "epoch": 0.3859014470082128, + "loss": 0.061507709324359894, + "loss_ce": 0.005645283497869968, + "loss_iou": 0.359375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 382753988, + "step": 3947 + }, + { + "epoch": 0.3859992178333985, + "grad_norm": 10.607513700825317, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 382851504, + "step": 3948 + }, + { + "epoch": 0.3859992178333985, + "loss": 0.08492382615804672, + "loss_ce": 0.00245007430203259, + "loss_iou": 0.26171875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 382851504, + "step": 3948 + }, + { + "epoch": 0.3860969886585843, + "grad_norm": 29.386422711737147, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 382948656, + "step": 3949 + }, + { + "epoch": 0.3860969886585843, + "loss": 0.07702763378620148, + "loss_ce": 0.004334765952080488, + "loss_iou": 0.41015625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 382948656, + "step": 3949 + }, + { + "epoch": 0.38619475948377, + "grad_norm": 16.304574332183428, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 383045356, + "step": 3950 + }, + { + "epoch": 0.38619475948377, + "loss": 0.09177414327859879, + "loss_ce": 0.003578340169042349, + "loss_iou": 0.482421875, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 383045356, + "step": 3950 + }, + { + "epoch": 0.38629253030895583, + "grad_norm": 9.00814191418779, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 383142144, + "step": 3951 + }, + { + "epoch": 0.38629253030895583, + "loss": 0.13058683276176453, + "loss_ce": 0.009126865305006504, + "loss_iou": 0.26171875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 383142144, + "step": 3951 + }, + { + "epoch": 0.3863903011341416, + "grad_norm": 9.382312807126583, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 383239928, + "step": 3952 + }, + { + "epoch": 0.3863903011341416, + "loss": 0.1089378073811531, + "loss_ce": 0.008260324597358704, + "loss_iou": 0.404296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 383239928, + "step": 3952 + }, + { + "epoch": 0.38648807195932733, + "grad_norm": 4.67283798046863, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 383336776, + "step": 3953 + }, + { + "epoch": 0.38648807195932733, + "loss": 0.08085838705301285, + "loss_ce": 0.006624375004321337, + "loss_iou": 0.314453125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 383336776, + "step": 3953 + }, + { + "epoch": 0.3865858427845131, + "grad_norm": 5.597124491233881, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 383432044, + "step": 3954 + }, + { + "epoch": 0.3865858427845131, + "loss": 0.0625096783041954, + "loss_ce": 0.00446524191647768, + "loss_iou": 0.11328125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 383432044, + "step": 3954 + }, + { + "epoch": 0.3866836136096989, + "grad_norm": 2.9779989873885966, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 383529452, + "step": 3955 + }, + { + "epoch": 0.3866836136096989, + "loss": 0.06604234874248505, + "loss_ce": 0.0023826775141060352, + "loss_iou": 0.2578125, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 383529452, + "step": 3955 + }, + { + "epoch": 0.38678138443488463, + "grad_norm": 3.733581378269996, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 383626456, + "step": 3956 + }, + { + "epoch": 0.38678138443488463, + "loss": 0.06427868455648422, + "loss_ce": 0.005684934556484222, + "loss_iou": 0.37890625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 383626456, + "step": 3956 + }, + { + "epoch": 0.3868791552600704, + "grad_norm": 12.942691857928263, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 383723460, + "step": 3957 + }, + { + "epoch": 0.3868791552600704, + "loss": 0.06085502356290817, + "loss_ce": 0.0028334781527519226, + "loss_iou": 0.3359375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 383723460, + "step": 3957 + }, + { + "epoch": 0.38697692608525613, + "grad_norm": 7.549409606786349, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 383819540, + "step": 3958 + }, + { + "epoch": 0.38697692608525613, + "loss": 0.0786706730723381, + "loss_ce": 0.005184344481676817, + "loss_iou": 0.26953125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 383819540, + "step": 3958 + }, + { + "epoch": 0.38707469691044194, + "grad_norm": 10.242159107666474, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 383916088, + "step": 3959 + }, + { + "epoch": 0.38707469691044194, + "loss": 0.07183459401130676, + "loss_ce": 0.009403261356055737, + "loss_iou": 0.23828125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 383916088, + "step": 3959 + }, + { + "epoch": 0.3871724677356277, + "grad_norm": 11.170510092633602, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 384013228, + "step": 3960 + }, + { + "epoch": 0.3871724677356277, + "loss": 0.08663105964660645, + "loss_ce": 0.00826955959200859, + "loss_iou": 0.25390625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 384013228, + "step": 3960 + }, + { + "epoch": 0.38727023856081344, + "grad_norm": 3.4894572923763723, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 384110156, + "step": 3961 + }, + { + "epoch": 0.38727023856081344, + "loss": 0.12003226578235626, + "loss_ce": 0.007727579213678837, + "loss_iou": 0.3046875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 384110156, + "step": 3961 + }, + { + "epoch": 0.3873680093859992, + "grad_norm": 4.599832315201861, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 384208000, + "step": 3962 + }, + { + "epoch": 0.3873680093859992, + "loss": 0.09902467578649521, + "loss_ce": 0.006785293109714985, + "loss_iou": 0.45703125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 384208000, + "step": 3962 + }, + { + "epoch": 0.387465780211185, + "grad_norm": 10.863119204164972, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 384305220, + "step": 3963 + }, + { + "epoch": 0.387465780211185, + "loss": 0.07959040999412537, + "loss_ce": 0.008484456688165665, + "loss_iou": 0.462890625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 384305220, + "step": 3963 + }, + { + "epoch": 0.38756355103637075, + "grad_norm": 10.24185758110183, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 384401944, + "step": 3964 + }, + { + "epoch": 0.38756355103637075, + "loss": 0.05885803699493408, + "loss_ce": 0.004948734305799007, + "loss_iou": 0.30078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 384401944, + "step": 3964 + }, + { + "epoch": 0.3876613218615565, + "grad_norm": 10.913927142205338, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 384498968, + "step": 3965 + }, + { + "epoch": 0.3876613218615565, + "loss": 0.10341811180114746, + "loss_ce": 0.006921533495187759, + "loss_iou": 0.33984375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 384498968, + "step": 3965 + }, + { + "epoch": 0.3877590926867423, + "grad_norm": 14.252723091320366, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 384595260, + "step": 3966 + }, + { + "epoch": 0.3877590926867423, + "loss": 0.06495276838541031, + "loss_ce": 0.0036276953760534525, + "loss_iou": 0.279296875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 384595260, + "step": 3966 + }, + { + "epoch": 0.38785686351192805, + "grad_norm": 49.70495292767148, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 384691128, + "step": 3967 + }, + { + "epoch": 0.38785686351192805, + "loss": 0.11931842565536499, + "loss_ce": 0.004816471133381128, + "loss_iou": 0.279296875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 384691128, + "step": 3967 + }, + { + "epoch": 0.3879546343371138, + "grad_norm": 21.41573492635648, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 384788532, + "step": 3968 + }, + { + "epoch": 0.3879546343371138, + "loss": 0.08669348061084747, + "loss_ce": 0.006798465736210346, + "loss_iou": 0.322265625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 384788532, + "step": 3968 + }, + { + "epoch": 0.38805240516229955, + "grad_norm": 7.715130504597928, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 384885284, + "step": 3969 + }, + { + "epoch": 0.38805240516229955, + "loss": 0.06366857886314392, + "loss_ce": 0.005685183685272932, + "loss_iou": 0.353515625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 384885284, + "step": 3969 + }, + { + "epoch": 0.38815017598748536, + "grad_norm": 11.670747205054859, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 384981400, + "step": 3970 + }, + { + "epoch": 0.38815017598748536, + "loss": 0.06591115891933441, + "loss_ce": 0.010109765455126762, + "loss_iou": 0.265625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 384981400, + "step": 3970 + }, + { + "epoch": 0.3882479468126711, + "grad_norm": 22.16709579283819, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 385079620, + "step": 3971 + }, + { + "epoch": 0.3882479468126711, + "loss": 0.11334876716136932, + "loss_ce": 0.007498546503484249, + "loss_iou": 0.421875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 385079620, + "step": 3971 + }, + { + "epoch": 0.38834571763785686, + "grad_norm": 4.435266485272104, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 385175936, + "step": 3972 + }, + { + "epoch": 0.38834571763785686, + "loss": 0.09752866625785828, + "loss_ce": 0.003442968474701047, + "loss_iou": 0.375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 385175936, + "step": 3972 + }, + { + "epoch": 0.3884434884630426, + "grad_norm": 7.769816380161768, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 385273676, + "step": 3973 + }, + { + "epoch": 0.3884434884630426, + "loss": 0.08755312860012054, + "loss_ce": 0.005186186172068119, + "loss_iou": 0.40234375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 385273676, + "step": 3973 + }, + { + "epoch": 0.3885412592882284, + "grad_norm": 9.646038401499013, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 385370868, + "step": 3974 + }, + { + "epoch": 0.3885412592882284, + "loss": 0.08582091331481934, + "loss_ce": 0.007543329615145922, + "loss_iou": 0.3671875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 385370868, + "step": 3974 + }, + { + "epoch": 0.38863903011341416, + "grad_norm": 5.869319452618011, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 385466908, + "step": 3975 + }, + { + "epoch": 0.38863903011341416, + "loss": 0.05439465492963791, + "loss_ce": 0.006413389462977648, + "loss_iou": 0.236328125, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 385466908, + "step": 3975 + }, + { + "epoch": 0.3887368009385999, + "grad_norm": 25.452838563786337, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 385563656, + "step": 3976 + }, + { + "epoch": 0.3887368009385999, + "loss": 0.09085668623447418, + "loss_ce": 0.0034238286316394806, + "loss_iou": 0.27734375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 385563656, + "step": 3976 + }, + { + "epoch": 0.38883457176378566, + "grad_norm": 2.8443251951257005, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 385660244, + "step": 3977 + }, + { + "epoch": 0.38883457176378566, + "loss": 0.03785253316164017, + "loss_ce": 0.005053765140473843, + "loss_iou": 0.2431640625, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 385660244, + "step": 3977 + }, + { + "epoch": 0.38893234258897147, + "grad_norm": 6.306442303482562, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 385757708, + "step": 3978 + }, + { + "epoch": 0.38893234258897147, + "loss": 0.09622316062450409, + "loss_ce": 0.0020764372311532497, + "loss_iou": 0.3203125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 385757708, + "step": 3978 + }, + { + "epoch": 0.3890301134141572, + "grad_norm": 19.770025004524456, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 385855008, + "step": 3979 + }, + { + "epoch": 0.3890301134141572, + "loss": 0.07039397954940796, + "loss_ce": 0.010579519905149937, + "loss_iou": 0.2890625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 385855008, + "step": 3979 + }, + { + "epoch": 0.38912788423934297, + "grad_norm": 7.28714733090576, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 385952848, + "step": 3980 + }, + { + "epoch": 0.38912788423934297, + "loss": 0.07785174250602722, + "loss_ce": 0.0028395368717610836, + "loss_iou": 0.4453125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 385952848, + "step": 3980 + }, + { + "epoch": 0.3892256550645287, + "grad_norm": 3.1281288694853298, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 386049936, + "step": 3981 + }, + { + "epoch": 0.3892256550645287, + "loss": 0.08412765711545944, + "loss_ce": 0.0046751415356993675, + "loss_iou": 0.298828125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 386049936, + "step": 3981 + }, + { + "epoch": 0.3893234258897145, + "grad_norm": 9.667415882447141, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 386148252, + "step": 3982 + }, + { + "epoch": 0.3893234258897145, + "loss": 0.09983539581298828, + "loss_ce": 0.005063056014478207, + "loss_iou": 0.3515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 386148252, + "step": 3982 + }, + { + "epoch": 0.3894211967149003, + "grad_norm": 6.213919907766673, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 386244732, + "step": 3983 + }, + { + "epoch": 0.3894211967149003, + "loss": 0.10672635585069656, + "loss_ce": 0.006109897047281265, + "loss_iou": 0.34765625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 386244732, + "step": 3983 + }, + { + "epoch": 0.389518967540086, + "grad_norm": 3.2893337542257655, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 386342112, + "step": 3984 + }, + { + "epoch": 0.389518967540086, + "loss": 0.09387661516666412, + "loss_ce": 0.007359276060014963, + "loss_iou": 0.38671875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 386342112, + "step": 3984 + }, + { + "epoch": 0.3896167383652718, + "grad_norm": 4.9273997721226666, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 386439916, + "step": 3985 + }, + { + "epoch": 0.3896167383652718, + "loss": 0.08553643524646759, + "loss_ce": 0.006404350511729717, + "loss_iou": 0.357421875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 386439916, + "step": 3985 + }, + { + "epoch": 0.3897145091904576, + "grad_norm": 6.9203560275538, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 386536564, + "step": 3986 + }, + { + "epoch": 0.3897145091904576, + "loss": 0.06205892190337181, + "loss_ce": 0.007661337964236736, + "loss_iou": 0.3203125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 386536564, + "step": 3986 + }, + { + "epoch": 0.38981228001564333, + "grad_norm": 4.532892008154236, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 386633824, + "step": 3987 + }, + { + "epoch": 0.38981228001564333, + "loss": 0.06095878779888153, + "loss_ce": 0.005905075464397669, + "loss_iou": 0.392578125, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 386633824, + "step": 3987 + }, + { + "epoch": 0.3899100508408291, + "grad_norm": 11.594371085568476, + "learning_rate": 5e-05, + "loss": 0.1185, + "num_input_tokens_seen": 386730380, + "step": 3988 + }, + { + "epoch": 0.3899100508408291, + "loss": 0.13719286024570465, + "loss_ce": 0.0036937138065695763, + "loss_iou": 0.447265625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 386730380, + "step": 3988 + }, + { + "epoch": 0.3900078216660149, + "grad_norm": 6.393607662338801, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 386827172, + "step": 3989 + }, + { + "epoch": 0.3900078216660149, + "loss": 0.08184680342674255, + "loss_ce": 0.004072753246873617, + "loss_iou": 0.2265625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 386827172, + "step": 3989 + }, + { + "epoch": 0.39010559249120064, + "grad_norm": 4.1204514638073455, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 386923824, + "step": 3990 + }, + { + "epoch": 0.39010559249120064, + "loss": 0.04727926105260849, + "loss_ce": 0.007263085804879665, + "loss_iou": 0.220703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 386923824, + "step": 3990 + }, + { + "epoch": 0.3902033633163864, + "grad_norm": 7.308724784751602, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 387020436, + "step": 3991 + }, + { + "epoch": 0.3902033633163864, + "loss": 0.09175947308540344, + "loss_ce": 0.013453271239995956, + "loss_iou": 0.279296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 387020436, + "step": 3991 + }, + { + "epoch": 0.39030113414157214, + "grad_norm": 7.019643290840896, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 387117216, + "step": 3992 + }, + { + "epoch": 0.39030113414157214, + "loss": 0.11200864613056183, + "loss_ce": 0.007119724992662668, + "loss_iou": 0.287109375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 387117216, + "step": 3992 + }, + { + "epoch": 0.39039890496675794, + "grad_norm": 5.560758157353199, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 387213752, + "step": 3993 + }, + { + "epoch": 0.39039890496675794, + "loss": 0.0453302264213562, + "loss_ce": 0.006893334444612265, + "loss_iou": 0.296875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 387213752, + "step": 3993 + }, + { + "epoch": 0.3904966757919437, + "grad_norm": 19.390606171848056, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 387309880, + "step": 3994 + }, + { + "epoch": 0.3904966757919437, + "loss": 0.05426577478647232, + "loss_ce": 0.0032403862569481134, + "loss_iou": 0.19140625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 387309880, + "step": 3994 + }, + { + "epoch": 0.39059444661712944, + "grad_norm": 58.18638036991911, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 387407272, + "step": 3995 + }, + { + "epoch": 0.39059444661712944, + "loss": 0.08854939043521881, + "loss_ce": 0.006228226236999035, + "loss_iou": 0.3671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 387407272, + "step": 3995 + }, + { + "epoch": 0.3906922174423152, + "grad_norm": 21.68385673053538, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 387504812, + "step": 3996 + }, + { + "epoch": 0.3906922174423152, + "loss": 0.07647403329610825, + "loss_ce": 0.004391512833535671, + "loss_iou": 0.5, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 387504812, + "step": 3996 + }, + { + "epoch": 0.390789988267501, + "grad_norm": 4.1129846865294155, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 387601524, + "step": 3997 + }, + { + "epoch": 0.390789988267501, + "loss": 0.11403262615203857, + "loss_ce": 0.007617827504873276, + "loss_iou": 0.333984375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 387601524, + "step": 3997 + }, + { + "epoch": 0.39088775909268675, + "grad_norm": 4.432340755393514, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 387698468, + "step": 3998 + }, + { + "epoch": 0.39088775909268675, + "loss": 0.08496813476085663, + "loss_ce": 0.006080201826989651, + "loss_iou": 0.279296875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 387698468, + "step": 3998 + }, + { + "epoch": 0.3909855299178725, + "grad_norm": 11.393806187186904, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 387795252, + "step": 3999 + }, + { + "epoch": 0.3909855299178725, + "loss": 0.07091733813285828, + "loss_ce": 0.008760660886764526, + "loss_iou": 0.171875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 387795252, + "step": 3999 + }, + { + "epoch": 0.39108330074305825, + "grad_norm": 14.008911138083672, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 387892524, + "step": 4000 + }, + { + "epoch": 0.39108330074305825, + "eval_seeclick_CIoU": 0.4847763627767563, + "eval_seeclick_GIoU": 0.49122002720832825, + "eval_seeclick_IoU": 0.5285226553678513, + "eval_seeclick_MAE_all": 0.07667010650038719, + "eval_seeclick_MAE_h": 0.04391826316714287, + "eval_seeclick_MAE_w": 0.10249019041657448, + "eval_seeclick_MAE_x": 0.11592534556984901, + "eval_seeclick_MAE_y": 0.04434661939740181, + "eval_seeclick_NUM_probability": 0.9999960958957672, + "eval_seeclick_inside_bbox": 0.7840909063816071, + "eval_seeclick_loss": 0.2840079069137573, + "eval_seeclick_loss_ce": 0.010118651203811169, + "eval_seeclick_loss_iou": 0.4713134765625, + "eval_seeclick_loss_num": 0.05573272705078125, + "eval_seeclick_loss_xval": 0.278533935546875, + "eval_seeclick_runtime": 73.6551, + "eval_seeclick_samples_per_second": 0.584, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 387892524, + "step": 4000 + }, + { + "epoch": 0.39108330074305825, + "eval_icons_CIoU": 0.7150937020778656, + "eval_icons_GIoU": 0.7167811095714569, + "eval_icons_IoU": 0.7357290983200073, + "eval_icons_MAE_all": 0.04569488950073719, + "eval_icons_MAE_h": 0.04630940966308117, + "eval_icons_MAE_w": 0.046795669943094254, + "eval_icons_MAE_x": 0.04678438976407051, + "eval_icons_MAE_y": 0.04289008677005768, + "eval_icons_NUM_probability": 0.9999754726886749, + "eval_icons_inside_bbox": 0.8506944477558136, + "eval_icons_loss": 0.14671283960342407, + "eval_icons_loss_ce": 8.134013569360832e-06, + "eval_icons_loss_iou": 0.3951416015625, + "eval_icons_loss_num": 0.031299591064453125, + "eval_icons_loss_xval": 0.15650177001953125, + "eval_icons_runtime": 90.1417, + "eval_icons_samples_per_second": 0.555, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 387892524, + "step": 4000 + }, + { + "epoch": 0.39108330074305825, + "eval_screenspot_CIoU": 0.3357835312684377, + "eval_screenspot_GIoU": 0.31678371628125507, + "eval_screenspot_IoU": 0.4200616081555684, + "eval_screenspot_MAE_all": 0.15297806759675345, + "eval_screenspot_MAE_h": 0.11095285415649414, + "eval_screenspot_MAE_w": 0.2048474500576655, + "eval_screenspot_MAE_x": 0.18637219071388245, + "eval_screenspot_MAE_y": 0.10973980277776718, + "eval_screenspot_NUM_probability": 0.9999491771062216, + "eval_screenspot_inside_bbox": 0.6820833285649618, + "eval_screenspot_loss": 0.5477026104927063, + "eval_screenspot_loss_ce": 0.027300554638107617, + "eval_screenspot_loss_iou": 0.3481852213541667, + "eval_screenspot_loss_num": 0.1041412353515625, + "eval_screenspot_loss_xval": 0.52081298828125, + "eval_screenspot_runtime": 148.981, + "eval_screenspot_samples_per_second": 0.597, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 387892524, + "step": 4000 + }, + { + "epoch": 0.39108330074305825, + "eval_compot_CIoU": 0.4620409905910492, + "eval_compot_GIoU": 0.4580812007188797, + "eval_compot_IoU": 0.529249519109726, + "eval_compot_MAE_all": 0.09942739456892014, + "eval_compot_MAE_h": 0.06629249826073647, + "eval_compot_MAE_w": 0.13410835713148117, + "eval_compot_MAE_x": 0.1319711096584797, + "eval_compot_MAE_y": 0.06533762440085411, + "eval_compot_NUM_probability": 0.9999833106994629, + "eval_compot_inside_bbox": 0.7048611044883728, + "eval_compot_loss": 0.3416641652584076, + "eval_compot_loss_ce": 0.03506382927298546, + "eval_compot_loss_iou": 0.49169921875, + "eval_compot_loss_num": 0.05408477783203125, + "eval_compot_loss_xval": 0.2705078125, + "eval_compot_runtime": 97.9824, + "eval_compot_samples_per_second": 0.51, + "eval_compot_steps_per_second": 0.02, + "num_input_tokens_seen": 387892524, + "step": 4000 + }, + { + "epoch": 0.39108330074305825, + "loss": 0.2858516573905945, + "loss_ce": 0.03151813894510269, + "loss_iou": 0.50390625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 387892524, + "step": 4000 + }, + { + "epoch": 0.39118107156824405, + "grad_norm": 58.36061591814445, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 387990308, + "step": 4001 + }, + { + "epoch": 0.39118107156824405, + "loss": 0.0750134289264679, + "loss_ce": 0.003327632322907448, + "loss_iou": 0.41796875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 387990308, + "step": 4001 + }, + { + "epoch": 0.3912788423934298, + "grad_norm": 11.403181412151056, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 388088208, + "step": 4002 + }, + { + "epoch": 0.3912788423934298, + "loss": 0.05559778958559036, + "loss_ce": 0.008844861760735512, + "loss_iou": 0.318359375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 388088208, + "step": 4002 + }, + { + "epoch": 0.39137661321861555, + "grad_norm": 15.509689498867887, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 388185248, + "step": 4003 + }, + { + "epoch": 0.39137661321861555, + "loss": 0.09612740576267242, + "loss_ce": 0.0035675866529345512, + "loss_iou": 0.296875, + "loss_num": 0.0184326171875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 388185248, + "step": 4003 + }, + { + "epoch": 0.3914743840438013, + "grad_norm": 10.742954202622006, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 388282176, + "step": 4004 + }, + { + "epoch": 0.3914743840438013, + "loss": 0.046325333416461945, + "loss_ce": 0.004523878917098045, + "loss_iou": 0.25, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 388282176, + "step": 4004 + }, + { + "epoch": 0.3915721548689871, + "grad_norm": 8.575240864916339, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 388380432, + "step": 4005 + }, + { + "epoch": 0.3915721548689871, + "loss": 0.09200252592563629, + "loss_ce": 0.0036083583254367113, + "loss_iou": 0.2734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 388380432, + "step": 4005 + }, + { + "epoch": 0.39166992569417286, + "grad_norm": 18.212196894026242, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 388477312, + "step": 4006 + }, + { + "epoch": 0.39166992569417286, + "loss": 0.12281747907400131, + "loss_ce": 0.010817969217896461, + "loss_iou": 0.31640625, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 388477312, + "step": 4006 + }, + { + "epoch": 0.3917676965193586, + "grad_norm": 7.972030557366594, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 388574088, + "step": 4007 + }, + { + "epoch": 0.3917676965193586, + "loss": 0.10075901448726654, + "loss_ce": 0.0050711482763290405, + "loss_iou": 0.34375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 388574088, + "step": 4007 + }, + { + "epoch": 0.39186546734454436, + "grad_norm": 6.723144600419798, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 388671288, + "step": 4008 + }, + { + "epoch": 0.39186546734454436, + "loss": 0.08882199972867966, + "loss_ce": 0.006096475757658482, + "loss_iou": 0.33203125, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 388671288, + "step": 4008 + }, + { + "epoch": 0.39196323816973017, + "grad_norm": 5.192250376900665, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 388768048, + "step": 4009 + }, + { + "epoch": 0.39196323816973017, + "loss": 0.1109243705868721, + "loss_ce": 0.004643089137971401, + "loss_iou": 0.306640625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 388768048, + "step": 4009 + }, + { + "epoch": 0.3920610089949159, + "grad_norm": 8.867806011095928, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 388865016, + "step": 4010 + }, + { + "epoch": 0.3920610089949159, + "loss": 0.05319347232580185, + "loss_ce": 0.002404595725238323, + "loss_iou": 0.255859375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 388865016, + "step": 4010 + }, + { + "epoch": 0.39215877982010167, + "grad_norm": 6.1680563612118995, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 388963028, + "step": 4011 + }, + { + "epoch": 0.39215877982010167, + "loss": 0.11585064232349396, + "loss_ce": 0.003515430726110935, + "loss_iou": 0.357421875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 388963028, + "step": 4011 + }, + { + "epoch": 0.39225655064528747, + "grad_norm": 9.88511630093407, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 389060060, + "step": 4012 + }, + { + "epoch": 0.39225655064528747, + "loss": 0.10019104182720184, + "loss_ce": 0.008760375902056694, + "loss_iou": 0.38671875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 389060060, + "step": 4012 + }, + { + "epoch": 0.3923543214704732, + "grad_norm": 7.0336393484260435, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 389155736, + "step": 4013 + }, + { + "epoch": 0.3923543214704732, + "loss": 0.0632275938987732, + "loss_ce": 0.002253466285765171, + "loss_iou": 0.3046875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 389155736, + "step": 4013 + }, + { + "epoch": 0.39245209229565897, + "grad_norm": 12.43027437353417, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 389251652, + "step": 4014 + }, + { + "epoch": 0.39245209229565897, + "loss": 0.07973755151033401, + "loss_ce": 0.005122074391692877, + "loss_iou": 0.373046875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 389251652, + "step": 4014 + }, + { + "epoch": 0.3925498631208447, + "grad_norm": 4.878647402626253, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 389348640, + "step": 4015 + }, + { + "epoch": 0.3925498631208447, + "loss": 0.05874767154455185, + "loss_ce": 0.0039533581584692, + "loss_iou": 0.294921875, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 389348640, + "step": 4015 + }, + { + "epoch": 0.3926476339460305, + "grad_norm": 10.46302292038561, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 389445508, + "step": 4016 + }, + { + "epoch": 0.3926476339460305, + "loss": 0.08458814024925232, + "loss_ce": 0.00264844112098217, + "loss_iou": 0.42578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 389445508, + "step": 4016 + }, + { + "epoch": 0.3927454047712163, + "grad_norm": 8.266761601105577, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 389542828, + "step": 4017 + }, + { + "epoch": 0.3927454047712163, + "loss": 0.08260652422904968, + "loss_ce": 0.006571978330612183, + "loss_iou": 0.357421875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 389542828, + "step": 4017 + }, + { + "epoch": 0.39284317559640203, + "grad_norm": 5.816436885858154, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 389639256, + "step": 4018 + }, + { + "epoch": 0.39284317559640203, + "loss": 0.08992458134889603, + "loss_ce": 0.007069356273859739, + "loss_iou": 0.3359375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 389639256, + "step": 4018 + }, + { + "epoch": 0.3929409464215878, + "grad_norm": 4.85065050740319, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 389736824, + "step": 4019 + }, + { + "epoch": 0.3929409464215878, + "loss": 0.08427217602729797, + "loss_ce": 0.004194049630314112, + "loss_iou": 0.302734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 389736824, + "step": 4019 + }, + { + "epoch": 0.3930387172467736, + "grad_norm": 13.846501306166092, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 389834044, + "step": 4020 + }, + { + "epoch": 0.3930387172467736, + "loss": 0.0578588992357254, + "loss_ce": 0.0031866594217717648, + "loss_iou": 0.380859375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 389834044, + "step": 4020 + }, + { + "epoch": 0.39313648807195933, + "grad_norm": 5.426549742325604, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 389931528, + "step": 4021 + }, + { + "epoch": 0.39313648807195933, + "loss": 0.14835724234580994, + "loss_ce": 0.0058401478454470634, + "loss_iou": 0.380859375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 389931528, + "step": 4021 + }, + { + "epoch": 0.3932342588971451, + "grad_norm": 15.816647817021899, + "learning_rate": 5e-05, + "loss": 0.1283, + "num_input_tokens_seen": 390030116, + "step": 4022 + }, + { + "epoch": 0.3932342588971451, + "loss": 0.17415136098861694, + "loss_ce": 0.005633304361253977, + "loss_iou": 0.328125, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 390030116, + "step": 4022 + }, + { + "epoch": 0.39333202972233083, + "grad_norm": 26.311086819338342, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 390127376, + "step": 4023 + }, + { + "epoch": 0.39333202972233083, + "loss": 0.11577022075653076, + "loss_ce": 0.007432825397700071, + "loss_iou": 0.328125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 390127376, + "step": 4023 + }, + { + "epoch": 0.39342980054751664, + "grad_norm": 9.992837034626616, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 390224604, + "step": 4024 + }, + { + "epoch": 0.39342980054751664, + "loss": 0.05485842004418373, + "loss_ce": 0.006503317039459944, + "loss_iou": 0.34375, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 390224604, + "step": 4024 + }, + { + "epoch": 0.3935275713727024, + "grad_norm": 5.473254185894112, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 390322288, + "step": 4025 + }, + { + "epoch": 0.3935275713727024, + "loss": 0.10454069077968597, + "loss_ce": 0.0033749151043593884, + "loss_iou": 0.30859375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 390322288, + "step": 4025 + }, + { + "epoch": 0.39362534219788814, + "grad_norm": 6.656864101343246, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 390418456, + "step": 4026 + }, + { + "epoch": 0.39362534219788814, + "loss": 0.09349476546049118, + "loss_ce": 0.004612318240106106, + "loss_iou": 0.287109375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 390418456, + "step": 4026 + }, + { + "epoch": 0.3937231130230739, + "grad_norm": 8.758871404666035, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 390514824, + "step": 4027 + }, + { + "epoch": 0.3937231130230739, + "loss": 0.08826082944869995, + "loss_ce": 0.005283527076244354, + "loss_iou": 0.333984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 390514824, + "step": 4027 + }, + { + "epoch": 0.3938208838482597, + "grad_norm": 5.74569323603754, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 390612160, + "step": 4028 + }, + { + "epoch": 0.3938208838482597, + "loss": 0.072686105966568, + "loss_ce": 0.009056954644620419, + "loss_iou": 0.369140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 390612160, + "step": 4028 + }, + { + "epoch": 0.39391865467344545, + "grad_norm": 5.400955174228044, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 390709064, + "step": 4029 + }, + { + "epoch": 0.39391865467344545, + "loss": 0.07460750639438629, + "loss_ce": 0.004432338755577803, + "loss_iou": 0.3515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 390709064, + "step": 4029 + }, + { + "epoch": 0.3940164254986312, + "grad_norm": 3.032995507064773, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 390805904, + "step": 4030 + }, + { + "epoch": 0.3940164254986312, + "loss": 0.07651884108781815, + "loss_ce": 0.006351298652589321, + "loss_iou": 0.396484375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 390805904, + "step": 4030 + }, + { + "epoch": 0.39411419632381695, + "grad_norm": 5.8159296333857124, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 390903276, + "step": 4031 + }, + { + "epoch": 0.39411419632381695, + "loss": 0.13229867815971375, + "loss_ce": 0.006139007396996021, + "loss_iou": 0.248046875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 390903276, + "step": 4031 + }, + { + "epoch": 0.39421196714900275, + "grad_norm": 4.732716995181567, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 391000508, + "step": 4032 + }, + { + "epoch": 0.39421196714900275, + "loss": 0.06228683888912201, + "loss_ce": 0.0012326108990237117, + "loss_iou": 0.26171875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 391000508, + "step": 4032 + }, + { + "epoch": 0.3943097379741885, + "grad_norm": 5.999234360802021, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 391097784, + "step": 4033 + }, + { + "epoch": 0.3943097379741885, + "loss": 0.11960788816213608, + "loss_ce": 0.00443454971536994, + "loss_iou": 0.306640625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 391097784, + "step": 4033 + }, + { + "epoch": 0.39440750879937425, + "grad_norm": 4.371738928301654, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 391195908, + "step": 4034 + }, + { + "epoch": 0.39440750879937425, + "loss": 0.10634234547615051, + "loss_ce": 0.0037880223244428635, + "loss_iou": 0.365234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 391195908, + "step": 4034 + }, + { + "epoch": 0.39450527962456006, + "grad_norm": 8.381185554723604, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 391292364, + "step": 4035 + }, + { + "epoch": 0.39450527962456006, + "loss": 0.10175222903490067, + "loss_ce": 0.006140654440969229, + "loss_iou": 0.24609375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 391292364, + "step": 4035 + }, + { + "epoch": 0.3946030504497458, + "grad_norm": 3.3874971785771, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 391389620, + "step": 4036 + }, + { + "epoch": 0.3946030504497458, + "loss": 0.047408998012542725, + "loss_ce": 0.005927980877459049, + "loss_iou": 0.275390625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 391389620, + "step": 4036 + }, + { + "epoch": 0.39470082127493156, + "grad_norm": 11.871644530740094, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 391485968, + "step": 4037 + }, + { + "epoch": 0.39470082127493156, + "loss": 0.08389797061681747, + "loss_ce": 0.005803488194942474, + "loss_iou": 0.32421875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 391485968, + "step": 4037 + }, + { + "epoch": 0.3947985921001173, + "grad_norm": 33.61043162188107, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 391582084, + "step": 4038 + }, + { + "epoch": 0.3947985921001173, + "loss": 0.11770900338888168, + "loss_ce": 0.007769428193569183, + "loss_iou": 0.244140625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 391582084, + "step": 4038 + }, + { + "epoch": 0.3948963629253031, + "grad_norm": 17.96801159589326, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 391678764, + "step": 4039 + }, + { + "epoch": 0.3948963629253031, + "loss": 0.054072096943855286, + "loss_ce": 0.005221085622906685, + "loss_iou": 0.333984375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 391678764, + "step": 4039 + }, + { + "epoch": 0.39499413375048886, + "grad_norm": 13.51283592006408, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 391776120, + "step": 4040 + }, + { + "epoch": 0.39499413375048886, + "loss": 0.08430078625679016, + "loss_ce": 0.004649904556572437, + "loss_iou": 0.32421875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 391776120, + "step": 4040 + }, + { + "epoch": 0.3950919045756746, + "grad_norm": 5.216761504467539, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 391874000, + "step": 4041 + }, + { + "epoch": 0.3950919045756746, + "loss": 0.09600630402565002, + "loss_ce": 0.0038889897987246513, + "loss_iou": 0.365234375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 391874000, + "step": 4041 + }, + { + "epoch": 0.39518967540086036, + "grad_norm": 15.303238958529878, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 391971892, + "step": 4042 + }, + { + "epoch": 0.39518967540086036, + "loss": 0.12162590026855469, + "loss_ce": 0.012037277221679688, + "loss_iou": 0.447265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 391971892, + "step": 4042 + }, + { + "epoch": 0.39528744622604617, + "grad_norm": 4.824752024131861, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 392068112, + "step": 4043 + }, + { + "epoch": 0.39528744622604617, + "loss": 0.10779552906751633, + "loss_ce": 0.00920848734676838, + "loss_iou": 0.265625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 392068112, + "step": 4043 + }, + { + "epoch": 0.3953852170512319, + "grad_norm": 6.434610642240203, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 392165100, + "step": 4044 + }, + { + "epoch": 0.3953852170512319, + "loss": 0.08717496693134308, + "loss_ce": 0.0030551680829375982, + "loss_iou": 0.189453125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 392165100, + "step": 4044 + }, + { + "epoch": 0.39548298787641767, + "grad_norm": 1.218047620352486, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 392261664, + "step": 4045 + }, + { + "epoch": 0.39548298787641767, + "loss": 0.08111453801393509, + "loss_ce": 0.0049579208716750145, + "loss_iou": 0.2490234375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 392261664, + "step": 4045 + }, + { + "epoch": 0.3955807587016034, + "grad_norm": 13.940027516491847, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 392358940, + "step": 4046 + }, + { + "epoch": 0.3955807587016034, + "loss": 0.13937216997146606, + "loss_ce": 0.009474100545048714, + "loss_iou": 0.2177734375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 392358940, + "step": 4046 + }, + { + "epoch": 0.3956785295267892, + "grad_norm": 17.312699749835204, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 392456884, + "step": 4047 + }, + { + "epoch": 0.3956785295267892, + "loss": 0.10622909665107727, + "loss_ce": 0.00370528781786561, + "loss_iou": 0.369140625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 392456884, + "step": 4047 + }, + { + "epoch": 0.395776300351975, + "grad_norm": 8.349554298592777, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 392553720, + "step": 4048 + }, + { + "epoch": 0.395776300351975, + "loss": 0.052760735154151917, + "loss_ce": 0.006267201621085405, + "loss_iou": 0.298828125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 392553720, + "step": 4048 + }, + { + "epoch": 0.3958740711771607, + "grad_norm": 12.32414314956989, + "learning_rate": 5e-05, + "loss": 0.1226, + "num_input_tokens_seen": 392650196, + "step": 4049 + }, + { + "epoch": 0.3958740711771607, + "loss": 0.11728179454803467, + "loss_ce": 0.00552641786634922, + "loss_iou": 0.3671875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 392650196, + "step": 4049 + }, + { + "epoch": 0.3959718420023465, + "grad_norm": 11.088736755140662, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 392746564, + "step": 4050 + }, + { + "epoch": 0.3959718420023465, + "loss": 0.10676416754722595, + "loss_ce": 0.006407110020518303, + "loss_iou": 0.353515625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 392746564, + "step": 4050 + }, + { + "epoch": 0.3960696128275323, + "grad_norm": 6.076386205290934, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 392843908, + "step": 4051 + }, + { + "epoch": 0.3960696128275323, + "loss": 0.09530707448720932, + "loss_ce": 0.012604438699781895, + "loss_iou": 0.296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 392843908, + "step": 4051 + }, + { + "epoch": 0.39616738365271803, + "grad_norm": 5.422997007055875, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 392940968, + "step": 4052 + }, + { + "epoch": 0.39616738365271803, + "loss": 0.1015843003988266, + "loss_ce": 0.007788526359945536, + "loss_iou": 0.33203125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 392940968, + "step": 4052 + }, + { + "epoch": 0.3962651544779038, + "grad_norm": 4.999974813765314, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 393037308, + "step": 4053 + }, + { + "epoch": 0.3962651544779038, + "loss": 0.06842857599258423, + "loss_ce": 0.006294785067439079, + "loss_iou": 0.283203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 393037308, + "step": 4053 + }, + { + "epoch": 0.39636292530308953, + "grad_norm": 7.203807212506876, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 393134264, + "step": 4054 + }, + { + "epoch": 0.39636292530308953, + "loss": 0.12682628631591797, + "loss_ce": 0.01374341081827879, + "loss_iou": 0.265625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 393134264, + "step": 4054 + }, + { + "epoch": 0.39646069612827534, + "grad_norm": 6.137100860472921, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 393231404, + "step": 4055 + }, + { + "epoch": 0.39646069612827534, + "loss": 0.0933392345905304, + "loss_ce": 0.006638790480792522, + "loss_iou": 0.37890625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 393231404, + "step": 4055 + }, + { + "epoch": 0.3965584669534611, + "grad_norm": 9.69466081300282, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 393328168, + "step": 4056 + }, + { + "epoch": 0.3965584669534611, + "loss": 0.10803758352994919, + "loss_ce": 0.007939928211271763, + "loss_iou": 0.33203125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 393328168, + "step": 4056 + }, + { + "epoch": 0.39665623777864684, + "grad_norm": 15.469046171148232, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 393424912, + "step": 4057 + }, + { + "epoch": 0.39665623777864684, + "loss": 0.10203567147254944, + "loss_ce": 0.005554347764700651, + "loss_iou": 0.29296875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 393424912, + "step": 4057 + }, + { + "epoch": 0.39675400860383264, + "grad_norm": 11.65368132415659, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 393522364, + "step": 4058 + }, + { + "epoch": 0.39675400860383264, + "loss": 0.0796813890337944, + "loss_ce": 0.005050651729106903, + "loss_iou": 0.33203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 393522364, + "step": 4058 + }, + { + "epoch": 0.3968517794290184, + "grad_norm": 9.174879480967462, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 393619152, + "step": 4059 + }, + { + "epoch": 0.3968517794290184, + "loss": 0.08031746000051498, + "loss_ce": 0.004664380103349686, + "loss_iou": 0.25390625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 393619152, + "step": 4059 + }, + { + "epoch": 0.39694955025420414, + "grad_norm": 3.1580014000634917, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 393715300, + "step": 4060 + }, + { + "epoch": 0.39694955025420414, + "loss": 0.07887423038482666, + "loss_ce": 0.0056015243753790855, + "loss_iou": 0.275390625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 393715300, + "step": 4060 + }, + { + "epoch": 0.3970473210793899, + "grad_norm": 8.271132038825085, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 393813604, + "step": 4061 + }, + { + "epoch": 0.3970473210793899, + "loss": 0.08011440932750702, + "loss_ce": 0.007680938113480806, + "loss_iou": 0.37890625, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 393813604, + "step": 4061 + }, + { + "epoch": 0.3971450919045757, + "grad_norm": 8.610124600207778, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 393910592, + "step": 4062 + }, + { + "epoch": 0.3971450919045757, + "loss": 0.07738836109638214, + "loss_ce": 0.003871514927595854, + "loss_iou": 0.326171875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 393910592, + "step": 4062 + }, + { + "epoch": 0.39724286272976145, + "grad_norm": 4.163493302692102, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 394006328, + "step": 4063 + }, + { + "epoch": 0.39724286272976145, + "loss": 0.04035899043083191, + "loss_ce": 0.0042872121557593346, + "loss_iou": 0.21875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 394006328, + "step": 4063 + }, + { + "epoch": 0.3973406335549472, + "grad_norm": 20.878558497425146, + "learning_rate": 5e-05, + "loss": 0.1227, + "num_input_tokens_seen": 394102996, + "step": 4064 + }, + { + "epoch": 0.3973406335549472, + "loss": 0.13654470443725586, + "loss_ce": 0.009667874313890934, + "loss_iou": 0.28125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 394102996, + "step": 4064 + }, + { + "epoch": 0.39743840438013295, + "grad_norm": 16.948190285415983, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 394199368, + "step": 4065 + }, + { + "epoch": 0.39743840438013295, + "loss": 0.10544642806053162, + "loss_ce": 0.008663746528327465, + "loss_iou": 0.28125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 394199368, + "step": 4065 + }, + { + "epoch": 0.39753617520531875, + "grad_norm": 7.44031791157073, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 394295944, + "step": 4066 + }, + { + "epoch": 0.39753617520531875, + "loss": 0.060338348150253296, + "loss_ce": 0.010548917576670647, + "loss_iou": 0.333984375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 394295944, + "step": 4066 + }, + { + "epoch": 0.3976339460305045, + "grad_norm": 14.052242289918697, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 394392972, + "step": 4067 + }, + { + "epoch": 0.3976339460305045, + "loss": 0.06840884685516357, + "loss_ce": 0.003955721855163574, + "loss_iou": 0.37109375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 394392972, + "step": 4067 + }, + { + "epoch": 0.39773171685569025, + "grad_norm": 3.31815344061549, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 394490632, + "step": 4068 + }, + { + "epoch": 0.39773171685569025, + "loss": 0.07655532658100128, + "loss_ce": 0.003725126152858138, + "loss_iou": 0.3046875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 394490632, + "step": 4068 + }, + { + "epoch": 0.397829487680876, + "grad_norm": 12.609807219101297, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 394587628, + "step": 4069 + }, + { + "epoch": 0.397829487680876, + "loss": 0.06052171811461449, + "loss_ce": 0.007238029036670923, + "loss_iou": 0.34375, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 394587628, + "step": 4069 + }, + { + "epoch": 0.3979272585060618, + "grad_norm": 21.93739478241564, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 394684392, + "step": 4070 + }, + { + "epoch": 0.3979272585060618, + "loss": 0.06135816499590874, + "loss_ce": 0.0038172733038663864, + "loss_iou": 0.357421875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 394684392, + "step": 4070 + }, + { + "epoch": 0.39802502933124756, + "grad_norm": 5.994565663230443, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 394780952, + "step": 4071 + }, + { + "epoch": 0.39802502933124756, + "loss": 0.15087777376174927, + "loss_ce": 0.004088236950337887, + "loss_iou": 0.326171875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 394780952, + "step": 4071 + }, + { + "epoch": 0.3981228001564333, + "grad_norm": 4.582292937569408, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 394877680, + "step": 4072 + }, + { + "epoch": 0.3981228001564333, + "loss": 0.0386381596326828, + "loss_ce": 0.004008338786661625, + "loss_iou": 0.26171875, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 394877680, + "step": 4072 + }, + { + "epoch": 0.39822057098161906, + "grad_norm": 6.5846214358487565, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 394974292, + "step": 4073 + }, + { + "epoch": 0.39822057098161906, + "loss": 0.058891139924526215, + "loss_ce": 0.006446679588407278, + "loss_iou": 0.421875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 394974292, + "step": 4073 + }, + { + "epoch": 0.39831834180680487, + "grad_norm": 5.759585468020235, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 395071780, + "step": 4074 + }, + { + "epoch": 0.39831834180680487, + "loss": 0.0635596290230751, + "loss_ce": 0.0037299171090126038, + "loss_iou": 0.337890625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 395071780, + "step": 4074 + }, + { + "epoch": 0.3984161126319906, + "grad_norm": 7.712625735280035, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 395168276, + "step": 4075 + }, + { + "epoch": 0.3984161126319906, + "loss": 0.07026959955692291, + "loss_ce": 0.0035734320990741253, + "loss_iou": 0.353515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 395168276, + "step": 4075 + }, + { + "epoch": 0.39851388345717637, + "grad_norm": 17.38267562624313, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 395265636, + "step": 4076 + }, + { + "epoch": 0.39851388345717637, + "loss": 0.0792078822851181, + "loss_ce": 0.008223997429013252, + "loss_iou": 0.28515625, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 395265636, + "step": 4076 + }, + { + "epoch": 0.3986116542823621, + "grad_norm": 4.857605325670389, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 395362032, + "step": 4077 + }, + { + "epoch": 0.3986116542823621, + "loss": 0.10440173745155334, + "loss_ce": 0.010987435467541218, + "loss_iou": 0.279296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 395362032, + "step": 4077 + }, + { + "epoch": 0.3987094251075479, + "grad_norm": 6.895313517698879, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 395458460, + "step": 4078 + }, + { + "epoch": 0.3987094251075479, + "loss": 0.028596527874469757, + "loss_ce": 0.002053862437605858, + "loss_iou": 0.23828125, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 395458460, + "step": 4078 + }, + { + "epoch": 0.39880719593273367, + "grad_norm": 8.59876705247871, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 395554376, + "step": 4079 + }, + { + "epoch": 0.39880719593273367, + "loss": 0.07210348546504974, + "loss_ce": 0.006978979799896479, + "loss_iou": 0.10546875, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 395554376, + "step": 4079 + }, + { + "epoch": 0.3989049667579194, + "grad_norm": 5.977430649196523, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 395651224, + "step": 4080 + }, + { + "epoch": 0.3989049667579194, + "loss": 0.05794040113687515, + "loss_ce": 0.002962986472994089, + "loss_iou": 0.326171875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 395651224, + "step": 4080 + }, + { + "epoch": 0.3990027375831052, + "grad_norm": 6.334950780585158, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 395748780, + "step": 4081 + }, + { + "epoch": 0.3990027375831052, + "loss": 0.06514209508895874, + "loss_ce": 0.005114017054438591, + "loss_iou": 0.302734375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 395748780, + "step": 4081 + }, + { + "epoch": 0.399100508408291, + "grad_norm": 4.727050895667267, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 395845320, + "step": 4082 + }, + { + "epoch": 0.399100508408291, + "loss": 0.14377711713314056, + "loss_ce": 0.008004413917660713, + "loss_iou": 0.146484375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 395845320, + "step": 4082 + }, + { + "epoch": 0.3991982792334767, + "grad_norm": 6.744182758626552, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 395942100, + "step": 4083 + }, + { + "epoch": 0.3991982792334767, + "loss": 0.056985631585121155, + "loss_ce": 0.004289400763809681, + "loss_iou": 0.283203125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 395942100, + "step": 4083 + }, + { + "epoch": 0.3992960500586625, + "grad_norm": 14.8007975410444, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 396039136, + "step": 4084 + }, + { + "epoch": 0.3992960500586625, + "loss": 0.07620668411254883, + "loss_ce": 0.0035290708765387535, + "loss_iou": 0.2890625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 396039136, + "step": 4084 + }, + { + "epoch": 0.3993938208838483, + "grad_norm": 25.329603595246116, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 396136952, + "step": 4085 + }, + { + "epoch": 0.3993938208838483, + "loss": 0.09110793471336365, + "loss_ce": 0.006177517585456371, + "loss_iou": 0.384765625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 396136952, + "step": 4085 + }, + { + "epoch": 0.39949159170903403, + "grad_norm": 18.55948242832338, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 396234300, + "step": 4086 + }, + { + "epoch": 0.39949159170903403, + "loss": 0.11208733916282654, + "loss_ce": 0.0057946136221289635, + "loss_iou": 0.36328125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 396234300, + "step": 4086 + }, + { + "epoch": 0.3995893625342198, + "grad_norm": 4.383591942923516, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 396330424, + "step": 4087 + }, + { + "epoch": 0.3995893625342198, + "loss": 0.09403358399868011, + "loss_ce": 0.004556044936180115, + "loss_iou": 0.2109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 396330424, + "step": 4087 + }, + { + "epoch": 0.39968713335940553, + "grad_norm": 13.780535150333911, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 396426492, + "step": 4088 + }, + { + "epoch": 0.39968713335940553, + "loss": 0.0642896220088005, + "loss_ce": 0.008640820160508156, + "loss_iou": 0.2041015625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 396426492, + "step": 4088 + }, + { + "epoch": 0.39978490418459134, + "grad_norm": 2.5450664272332686, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 396522048, + "step": 4089 + }, + { + "epoch": 0.39978490418459134, + "loss": 0.05675851181149483, + "loss_ce": 0.005946745164692402, + "loss_iou": 0.201171875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 396522048, + "step": 4089 + }, + { + "epoch": 0.3998826750097771, + "grad_norm": 3.835838132763431, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 396619188, + "step": 4090 + }, + { + "epoch": 0.3998826750097771, + "loss": 0.07997997850179672, + "loss_ce": 0.008187375962734222, + "loss_iou": 0.330078125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 396619188, + "step": 4090 + }, + { + "epoch": 0.39998044583496284, + "grad_norm": 43.355606052559786, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 396716336, + "step": 4091 + }, + { + "epoch": 0.39998044583496284, + "loss": 0.07166904211044312, + "loss_ce": 0.0028518959879875183, + "loss_iou": 0.35546875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 396716336, + "step": 4091 + }, + { + "epoch": 0.4000782166601486, + "grad_norm": 24.075279153270184, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 396812808, + "step": 4092 + }, + { + "epoch": 0.4000782166601486, + "loss": 0.05926433205604553, + "loss_ce": 0.008505968376994133, + "loss_iou": 0.232421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 396812808, + "step": 4092 + }, + { + "epoch": 0.4001759874853344, + "grad_norm": 5.822565892669082, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 396909992, + "step": 4093 + }, + { + "epoch": 0.4001759874853344, + "loss": 0.10832350701093674, + "loss_ce": 0.007478172890841961, + "loss_iou": 0.28125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 396909992, + "step": 4093 + }, + { + "epoch": 0.40027375831052014, + "grad_norm": 1.3316303176436155, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 397007404, + "step": 4094 + }, + { + "epoch": 0.40027375831052014, + "loss": 0.0713040828704834, + "loss_ce": 0.008935695514082909, + "loss_iou": 0.25, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 397007404, + "step": 4094 + }, + { + "epoch": 0.4003715291357059, + "grad_norm": 12.875148866110509, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 397104360, + "step": 4095 + }, + { + "epoch": 0.4003715291357059, + "loss": 0.0696449726819992, + "loss_ce": 0.0024300115182995796, + "loss_iou": 0.3671875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 397104360, + "step": 4095 + }, + { + "epoch": 0.40046929996089164, + "grad_norm": 10.049199059891963, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 397201844, + "step": 4096 + }, + { + "epoch": 0.40046929996089164, + "loss": 0.08670634776353836, + "loss_ce": 0.007055466063320637, + "loss_iou": 0.302734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 397201844, + "step": 4096 + }, + { + "epoch": 0.40056707078607745, + "grad_norm": 5.761575120960857, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 397298632, + "step": 4097 + }, + { + "epoch": 0.40056707078607745, + "loss": 0.09205276519060135, + "loss_ce": 0.007992098107933998, + "loss_iou": 0.283203125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 397298632, + "step": 4097 + }, + { + "epoch": 0.4006648416112632, + "grad_norm": 7.068252430949678, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 397396344, + "step": 4098 + }, + { + "epoch": 0.4006648416112632, + "loss": 0.08801223337650299, + "loss_ce": 0.004302519373595715, + "loss_iou": 0.341796875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 397396344, + "step": 4098 + }, + { + "epoch": 0.40076261243644895, + "grad_norm": 9.073937474134706, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 397492516, + "step": 4099 + }, + { + "epoch": 0.40076261243644895, + "loss": 0.11000145226716995, + "loss_ce": 0.0069435895420610905, + "loss_iou": 0.376953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 397492516, + "step": 4099 + }, + { + "epoch": 0.4008603832616347, + "grad_norm": 8.859831756105951, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 397589640, + "step": 4100 + }, + { + "epoch": 0.4008603832616347, + "loss": 0.11886243522167206, + "loss_ce": 0.008175172843039036, + "loss_iou": 0.271484375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 397589640, + "step": 4100 + }, + { + "epoch": 0.4009581540868205, + "grad_norm": 3.4111048002336757, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 397685760, + "step": 4101 + }, + { + "epoch": 0.4009581540868205, + "loss": 0.07123154401779175, + "loss_ce": 0.0072552538476884365, + "loss_iou": 0.2099609375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 397685760, + "step": 4101 + }, + { + "epoch": 0.40105592491200626, + "grad_norm": 2.835694877018141, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 397782220, + "step": 4102 + }, + { + "epoch": 0.40105592491200626, + "loss": 0.08423914015293121, + "loss_ce": 0.0070983292534947395, + "loss_iou": 0.228515625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 397782220, + "step": 4102 + }, + { + "epoch": 0.401153695737192, + "grad_norm": 9.13106194252592, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 397878944, + "step": 4103 + }, + { + "epoch": 0.401153695737192, + "loss": 0.0721832811832428, + "loss_ce": 0.003640803275629878, + "loss_iou": 0.41796875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 397878944, + "step": 4103 + }, + { + "epoch": 0.4012514665623778, + "grad_norm": 16.766779328244663, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 397976216, + "step": 4104 + }, + { + "epoch": 0.4012514665623778, + "loss": 0.09433643519878387, + "loss_ce": 0.003020212287083268, + "loss_iou": 0.43359375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 397976216, + "step": 4104 + }, + { + "epoch": 0.40134923738756356, + "grad_norm": 10.88556592394656, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 398072528, + "step": 4105 + }, + { + "epoch": 0.40134923738756356, + "loss": 0.0574590265750885, + "loss_ce": 0.003534469986334443, + "loss_iou": 0.1650390625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 398072528, + "step": 4105 + }, + { + "epoch": 0.4014470082127493, + "grad_norm": 3.6191335564068945, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 398169536, + "step": 4106 + }, + { + "epoch": 0.4014470082127493, + "loss": 0.06071126088500023, + "loss_ce": 0.0038799000903964043, + "loss_iou": 0.29296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 398169536, + "step": 4106 + }, + { + "epoch": 0.40154477903793506, + "grad_norm": 5.453862324535017, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 398266844, + "step": 4107 + }, + { + "epoch": 0.40154477903793506, + "loss": 0.07535649091005325, + "loss_ce": 0.0013361078454181552, + "loss_iou": 0.345703125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 398266844, + "step": 4107 + }, + { + "epoch": 0.40164254986312087, + "grad_norm": 9.782988375430811, + "learning_rate": 5e-05, + "loss": 0.1383, + "num_input_tokens_seen": 398363756, + "step": 4108 + }, + { + "epoch": 0.40164254986312087, + "loss": 0.13751938939094543, + "loss_ce": 0.012153184041380882, + "loss_iou": 0.384765625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 398363756, + "step": 4108 + }, + { + "epoch": 0.4017403206883066, + "grad_norm": 13.479482815239804, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 398460656, + "step": 4109 + }, + { + "epoch": 0.4017403206883066, + "loss": 0.08320797979831696, + "loss_ce": 0.008012671023607254, + "loss_iou": 0.3359375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 398460656, + "step": 4109 + }, + { + "epoch": 0.40183809151349237, + "grad_norm": 4.66146445978482, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 398558068, + "step": 4110 + }, + { + "epoch": 0.40183809151349237, + "loss": 0.0883125513792038, + "loss_ce": 0.002497125416994095, + "loss_iou": 0.392578125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 398558068, + "step": 4110 + }, + { + "epoch": 0.4019358623386781, + "grad_norm": 8.73795816312953, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 398655708, + "step": 4111 + }, + { + "epoch": 0.4019358623386781, + "loss": 0.048799820244312286, + "loss_ce": 0.0036490594502538443, + "loss_iou": 0.328125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 398655708, + "step": 4111 + }, + { + "epoch": 0.4020336331638639, + "grad_norm": 5.042311370178219, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 398752976, + "step": 4112 + }, + { + "epoch": 0.4020336331638639, + "loss": 0.10794441401958466, + "loss_ce": 0.004459307063370943, + "loss_iou": 0.4140625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 398752976, + "step": 4112 + }, + { + "epoch": 0.4021314039890497, + "grad_norm": 14.515249965421534, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 398850460, + "step": 4113 + }, + { + "epoch": 0.4021314039890497, + "loss": 0.09027864784002304, + "loss_ce": 0.006233240477740765, + "loss_iou": 0.333984375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 398850460, + "step": 4113 + }, + { + "epoch": 0.4022291748142354, + "grad_norm": 18.641134187205758, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 398947104, + "step": 4114 + }, + { + "epoch": 0.4022291748142354, + "loss": 0.06441187858581543, + "loss_ce": 0.0031325812451541424, + "loss_iou": 0.298828125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 398947104, + "step": 4114 + }, + { + "epoch": 0.4023269456394212, + "grad_norm": 5.268129566057821, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 399044824, + "step": 4115 + }, + { + "epoch": 0.4023269456394212, + "loss": 0.07922407984733582, + "loss_ce": 0.003189539536833763, + "loss_iou": 0.31640625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 399044824, + "step": 4115 + }, + { + "epoch": 0.402424716464607, + "grad_norm": 3.166191959901668, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 399141844, + "step": 4116 + }, + { + "epoch": 0.402424716464607, + "loss": 0.04915160685777664, + "loss_ce": 0.0006896904669702053, + "loss_iou": 0.328125, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 399141844, + "step": 4116 + }, + { + "epoch": 0.40252248728979273, + "grad_norm": 14.841875082334042, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 399240108, + "step": 4117 + }, + { + "epoch": 0.40252248728979273, + "loss": 0.0958859995007515, + "loss_ce": 0.0024106591008603573, + "loss_iou": 0.3203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 399240108, + "step": 4117 + }, + { + "epoch": 0.4026202581149785, + "grad_norm": 6.292832187791736, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 399336896, + "step": 4118 + }, + { + "epoch": 0.4026202581149785, + "loss": 0.08932371437549591, + "loss_ce": 0.006041240878403187, + "loss_iou": 0.3515625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 399336896, + "step": 4118 + }, + { + "epoch": 0.40271802894016423, + "grad_norm": 3.237211414284086, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 399433960, + "step": 4119 + }, + { + "epoch": 0.40271802894016423, + "loss": 0.10446327924728394, + "loss_ce": 0.006105125416070223, + "loss_iou": 0.33203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 399433960, + "step": 4119 + }, + { + "epoch": 0.40281579976535004, + "grad_norm": 7.5624649197573515, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 399530664, + "step": 4120 + }, + { + "epoch": 0.40281579976535004, + "loss": 0.09201911091804504, + "loss_ce": 0.004128491971641779, + "loss_iou": 0.376953125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 399530664, + "step": 4120 + }, + { + "epoch": 0.4029135705905358, + "grad_norm": 18.0537553466589, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 399627392, + "step": 4121 + }, + { + "epoch": 0.4029135705905358, + "loss": 0.1529151052236557, + "loss_ce": 0.004508126527070999, + "loss_iou": 0.37109375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 399627392, + "step": 4121 + }, + { + "epoch": 0.40301134141572154, + "grad_norm": 7.092209407850071, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 399724100, + "step": 4122 + }, + { + "epoch": 0.40301134141572154, + "loss": 0.09292484074831009, + "loss_ce": 0.0013415887951850891, + "loss_iou": 0.431640625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 399724100, + "step": 4122 + }, + { + "epoch": 0.4031091122409073, + "grad_norm": 14.622457245616484, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 399821684, + "step": 4123 + }, + { + "epoch": 0.4031091122409073, + "loss": 0.0650743842124939, + "loss_ce": 0.004374918527901173, + "loss_iou": 0.328125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 399821684, + "step": 4123 + }, + { + "epoch": 0.4032068830660931, + "grad_norm": 4.931225349648434, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 399919504, + "step": 4124 + }, + { + "epoch": 0.4032068830660931, + "loss": 0.12251285463571548, + "loss_ce": 0.009628331288695335, + "loss_iou": 0.26953125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 399919504, + "step": 4124 + }, + { + "epoch": 0.40330465389127884, + "grad_norm": 4.820201231773366, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 400016136, + "step": 4125 + }, + { + "epoch": 0.40330465389127884, + "loss": 0.09175817668437958, + "loss_ce": 0.0075372857972979546, + "loss_iou": 0.2333984375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 400016136, + "step": 4125 + }, + { + "epoch": 0.4034024247164646, + "grad_norm": 3.4078701075115, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 400112432, + "step": 4126 + }, + { + "epoch": 0.4034024247164646, + "loss": 0.06645616143941879, + "loss_ce": 0.008228624239563942, + "loss_iou": 0.35546875, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 400112432, + "step": 4126 + }, + { + "epoch": 0.4035001955416504, + "grad_norm": 6.94616693821432, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 400209492, + "step": 4127 + }, + { + "epoch": 0.4035001955416504, + "loss": 0.07789044082164764, + "loss_ce": 0.004274417646229267, + "loss_iou": 0.248046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 400209492, + "step": 4127 + }, + { + "epoch": 0.40359796636683615, + "grad_norm": 6.400413594975782, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 400306196, + "step": 4128 + }, + { + "epoch": 0.40359796636683615, + "loss": 0.06385770440101624, + "loss_ce": 0.003814372234046459, + "loss_iou": 0.296875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 400306196, + "step": 4128 + }, + { + "epoch": 0.4036957371920219, + "grad_norm": 3.310335233898763, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 400404168, + "step": 4129 + }, + { + "epoch": 0.4036957371920219, + "loss": 0.06019911915063858, + "loss_ce": 0.008136127144098282, + "loss_iou": 0.294921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 400404168, + "step": 4129 + }, + { + "epoch": 0.40379350801720765, + "grad_norm": 3.7755526327080378, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 400501260, + "step": 4130 + }, + { + "epoch": 0.40379350801720765, + "loss": 0.03627537935972214, + "loss_ce": 0.0033011334016919136, + "loss_iou": 0.36328125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 400501260, + "step": 4130 + }, + { + "epoch": 0.40389127884239345, + "grad_norm": 12.372922662269236, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 400598928, + "step": 4131 + }, + { + "epoch": 0.40389127884239345, + "loss": 0.05739470571279526, + "loss_ce": 0.003302300814539194, + "loss_iou": 0.3515625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 400598928, + "step": 4131 + }, + { + "epoch": 0.4039890496675792, + "grad_norm": 5.00735523470234, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 400696276, + "step": 4132 + }, + { + "epoch": 0.4039890496675792, + "loss": 0.07605469226837158, + "loss_ce": 0.00537598691880703, + "loss_iou": 0.39453125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 400696276, + "step": 4132 + }, + { + "epoch": 0.40408682049276495, + "grad_norm": 3.457683969223054, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 400793120, + "step": 4133 + }, + { + "epoch": 0.40408682049276495, + "loss": 0.0955679714679718, + "loss_ce": 0.00212314841337502, + "loss_iou": 0.314453125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 400793120, + "step": 4133 + }, + { + "epoch": 0.4041845913179507, + "grad_norm": 24.38291921572809, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 400890196, + "step": 4134 + }, + { + "epoch": 0.4041845913179507, + "loss": 0.0934889167547226, + "loss_ce": 0.008527981117367744, + "loss_iou": 0.400390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 400890196, + "step": 4134 + }, + { + "epoch": 0.4042823621431365, + "grad_norm": 5.851810461851548, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 400985620, + "step": 4135 + }, + { + "epoch": 0.4042823621431365, + "loss": 0.06753405928611755, + "loss_ce": 0.006270020734518766, + "loss_iou": 0.265625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 400985620, + "step": 4135 + }, + { + "epoch": 0.40438013296832226, + "grad_norm": 25.087748836524714, + "learning_rate": 5e-05, + "loss": 0.1212, + "num_input_tokens_seen": 401082288, + "step": 4136 + }, + { + "epoch": 0.40438013296832226, + "loss": 0.11729717254638672, + "loss_ce": 0.0035123785492032766, + "loss_iou": 0.36328125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 401082288, + "step": 4136 + }, + { + "epoch": 0.404477903793508, + "grad_norm": 13.3378670844843, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 401179808, + "step": 4137 + }, + { + "epoch": 0.404477903793508, + "loss": 0.08185674250125885, + "loss_ce": 0.0036249361000955105, + "loss_iou": 0.328125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 401179808, + "step": 4137 + }, + { + "epoch": 0.40457567461869376, + "grad_norm": 4.766465388082907, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 401277196, + "step": 4138 + }, + { + "epoch": 0.40457567461869376, + "loss": 0.07847195118665695, + "loss_ce": 0.008800322189927101, + "loss_iou": 0.294921875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 401277196, + "step": 4138 + }, + { + "epoch": 0.40467344544387956, + "grad_norm": 10.357341884170035, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 401374340, + "step": 4139 + }, + { + "epoch": 0.40467344544387956, + "loss": 0.05765284597873688, + "loss_ce": 0.006062882952392101, + "loss_iou": 0.3359375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 401374340, + "step": 4139 + }, + { + "epoch": 0.4047712162690653, + "grad_norm": 4.754034329850284, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 401471888, + "step": 4140 + }, + { + "epoch": 0.4047712162690653, + "loss": 0.10947446525096893, + "loss_ce": 0.0034716499503701925, + "loss_iou": 0.396484375, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 401471888, + "step": 4140 + }, + { + "epoch": 0.40486898709425106, + "grad_norm": 7.078906844661817, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 401569344, + "step": 4141 + }, + { + "epoch": 0.40486898709425106, + "loss": 0.058042220771312714, + "loss_ce": 0.0035073093604296446, + "loss_iou": 0.37890625, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 401569344, + "step": 4141 + }, + { + "epoch": 0.4049667579194368, + "grad_norm": 5.563073582849188, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 401665520, + "step": 4142 + }, + { + "epoch": 0.4049667579194368, + "loss": 0.07461651414632797, + "loss_ce": 0.009080015122890472, + "loss_iou": 0.326171875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 401665520, + "step": 4142 + }, + { + "epoch": 0.4050645287446226, + "grad_norm": 13.990981085561346, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 401762480, + "step": 4143 + }, + { + "epoch": 0.4050645287446226, + "loss": 0.11139436811208725, + "loss_ce": 0.003148518968373537, + "loss_iou": 0.26953125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 401762480, + "step": 4143 + }, + { + "epoch": 0.40516229956980837, + "grad_norm": 133.20567170488042, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 401858676, + "step": 4144 + }, + { + "epoch": 0.40516229956980837, + "loss": 0.10000208020210266, + "loss_ce": 0.007900030352175236, + "loss_iou": 0.3203125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 401858676, + "step": 4144 + }, + { + "epoch": 0.4052600703949941, + "grad_norm": 16.8576025236429, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 401956324, + "step": 4145 + }, + { + "epoch": 0.4052600703949941, + "loss": 0.0911359190940857, + "loss_ce": 0.004969542846083641, + "loss_iou": 0.333984375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 401956324, + "step": 4145 + }, + { + "epoch": 0.40535784122017987, + "grad_norm": 25.20956125600009, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 402052384, + "step": 4146 + }, + { + "epoch": 0.40535784122017987, + "loss": 0.07231304049491882, + "loss_ce": 0.006280636414885521, + "loss_iou": 0.158203125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 402052384, + "step": 4146 + }, + { + "epoch": 0.4054556120453657, + "grad_norm": 10.109817538392479, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 402149272, + "step": 4147 + }, + { + "epoch": 0.4054556120453657, + "loss": 0.12745097279548645, + "loss_ce": 0.00837139505892992, + "loss_iou": 0.2578125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 402149272, + "step": 4147 + }, + { + "epoch": 0.4055533828705514, + "grad_norm": 14.042529006004978, + "learning_rate": 5e-05, + "loss": 0.1531, + "num_input_tokens_seen": 402246920, + "step": 4148 + }, + { + "epoch": 0.4055533828705514, + "loss": 0.19892048835754395, + "loss_ce": 0.010504961013793945, + "loss_iou": 0.341796875, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 402246920, + "step": 4148 + }, + { + "epoch": 0.4056511536957372, + "grad_norm": 12.355083782529732, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 402343824, + "step": 4149 + }, + { + "epoch": 0.4056511536957372, + "loss": 0.09354617446660995, + "loss_ce": 0.012995025143027306, + "loss_iou": 0.3203125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 402343824, + "step": 4149 + }, + { + "epoch": 0.405748924520923, + "grad_norm": 8.192462642486667, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 402441588, + "step": 4150 + }, + { + "epoch": 0.405748924520923, + "loss": 0.09624495357275009, + "loss_ce": 0.006858963519334793, + "loss_iou": 0.333984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 402441588, + "step": 4150 + }, + { + "epoch": 0.40584669534610873, + "grad_norm": 13.956545820300866, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 402539296, + "step": 4151 + }, + { + "epoch": 0.40584669534610873, + "loss": 0.11466769129037857, + "loss_ce": 0.007093227002769709, + "loss_iou": 0.28515625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 402539296, + "step": 4151 + }, + { + "epoch": 0.4059444661712945, + "grad_norm": 11.299630402965892, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 402635980, + "step": 4152 + }, + { + "epoch": 0.4059444661712945, + "loss": 0.1103997752070427, + "loss_ce": 0.015245972201228142, + "loss_iou": 0.298828125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 402635980, + "step": 4152 + }, + { + "epoch": 0.40604223699648023, + "grad_norm": 10.131885839370593, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 402732636, + "step": 4153 + }, + { + "epoch": 0.40604223699648023, + "loss": 0.07078340649604797, + "loss_ce": 0.0068643358536064625, + "loss_iou": 0.392578125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 402732636, + "step": 4153 + }, + { + "epoch": 0.40614000782166604, + "grad_norm": 4.126050141010114, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 402830260, + "step": 4154 + }, + { + "epoch": 0.40614000782166604, + "loss": 0.08930788189172745, + "loss_ce": 0.007085897959768772, + "loss_iou": 0.337890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 402830260, + "step": 4154 + }, + { + "epoch": 0.4062377786468518, + "grad_norm": 4.607300965349007, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 402927796, + "step": 4155 + }, + { + "epoch": 0.4062377786468518, + "loss": 0.09571324288845062, + "loss_ce": 0.006540884729474783, + "loss_iou": 0.419921875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 402927796, + "step": 4155 + }, + { + "epoch": 0.40633554947203754, + "grad_norm": 20.946371828973266, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 403025276, + "step": 4156 + }, + { + "epoch": 0.40633554947203754, + "loss": 0.07774192094802856, + "loss_ce": 0.0026381572242826223, + "loss_iou": 0.38671875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 403025276, + "step": 4156 + }, + { + "epoch": 0.4064333202972233, + "grad_norm": 28.262642122048476, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 403123392, + "step": 4157 + }, + { + "epoch": 0.4064333202972233, + "loss": 0.10376887023448944, + "loss_ce": 0.0033660384360700846, + "loss_iou": 0.306640625, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 403123392, + "step": 4157 + }, + { + "epoch": 0.4065310911224091, + "grad_norm": 5.326004744558147, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 403219932, + "step": 4158 + }, + { + "epoch": 0.4065310911224091, + "loss": 0.06708909571170807, + "loss_ce": 0.007671365514397621, + "loss_iou": 0.24609375, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 403219932, + "step": 4158 + }, + { + "epoch": 0.40662886194759484, + "grad_norm": 6.3816678994012435, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 403317412, + "step": 4159 + }, + { + "epoch": 0.40662886194759484, + "loss": 0.0795687586069107, + "loss_ce": 0.002847562776878476, + "loss_iou": 0.328125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 403317412, + "step": 4159 + }, + { + "epoch": 0.4067266327727806, + "grad_norm": 3.3751269036900853, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 403412520, + "step": 4160 + }, + { + "epoch": 0.4067266327727806, + "loss": 0.07671542465686798, + "loss_ce": 0.015935853123664856, + "loss_iou": 0.208984375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 403412520, + "step": 4160 + }, + { + "epoch": 0.40682440359796634, + "grad_norm": 11.278020937753949, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 403510220, + "step": 4161 + }, + { + "epoch": 0.40682440359796634, + "loss": 0.12351511418819427, + "loss_ce": 0.007700898684561253, + "loss_iou": 0.310546875, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 403510220, + "step": 4161 + }, + { + "epoch": 0.40692217442315215, + "grad_norm": 3.5372907081666156, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 403607624, + "step": 4162 + }, + { + "epoch": 0.40692217442315215, + "loss": 0.08035066723823547, + "loss_ce": 0.00603273743763566, + "loss_iou": 0.294921875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 403607624, + "step": 4162 + }, + { + "epoch": 0.4070199452483379, + "grad_norm": 11.421324909582037, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 403703676, + "step": 4163 + }, + { + "epoch": 0.4070199452483379, + "loss": 0.12773242592811584, + "loss_ce": 0.006249566562473774, + "loss_iou": 0.25390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 403703676, + "step": 4163 + }, + { + "epoch": 0.40711771607352365, + "grad_norm": 8.130030425957857, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 403801056, + "step": 4164 + }, + { + "epoch": 0.40711771607352365, + "loss": 0.08185143023729324, + "loss_ce": 0.0034517706371843815, + "loss_iou": 0.30078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 403801056, + "step": 4164 + }, + { + "epoch": 0.4072154868987094, + "grad_norm": 7.723946425752042, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 403897888, + "step": 4165 + }, + { + "epoch": 0.4072154868987094, + "loss": 0.05799391493201256, + "loss_ce": 0.004084615968167782, + "loss_iou": 0.35546875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 403897888, + "step": 4165 + }, + { + "epoch": 0.4073132577238952, + "grad_norm": 18.07377994125907, + "learning_rate": 5e-05, + "loss": 0.1414, + "num_input_tokens_seen": 403994568, + "step": 4166 + }, + { + "epoch": 0.4073132577238952, + "loss": 0.19558683037757874, + "loss_ce": 0.007552769035100937, + "loss_iou": 0.2373046875, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 403994568, + "step": 4166 + }, + { + "epoch": 0.40741102854908096, + "grad_norm": 36.42273071182707, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 404092172, + "step": 4167 + }, + { + "epoch": 0.40741102854908096, + "loss": 0.06803935766220093, + "loss_ce": 0.005112108774483204, + "loss_iou": 0.33984375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 404092172, + "step": 4167 + }, + { + "epoch": 0.4075087993742667, + "grad_norm": 16.225575800295314, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 404190404, + "step": 4168 + }, + { + "epoch": 0.4075087993742667, + "loss": 0.08428020775318146, + "loss_ce": 0.006063654087483883, + "loss_iou": 0.404296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 404190404, + "step": 4168 + }, + { + "epoch": 0.40760657019945246, + "grad_norm": 7.587051115643426, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 404287472, + "step": 4169 + }, + { + "epoch": 0.40760657019945246, + "loss": 0.11611098796129227, + "loss_ce": 0.0036842282861471176, + "loss_iou": 0.3828125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 404287472, + "step": 4169 + }, + { + "epoch": 0.40770434102463826, + "grad_norm": 3.2215795891293326, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 404384364, + "step": 4170 + }, + { + "epoch": 0.40770434102463826, + "loss": 0.07591372728347778, + "loss_ce": 0.007066069170832634, + "loss_iou": 0.326171875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 404384364, + "step": 4170 + }, + { + "epoch": 0.407802111849824, + "grad_norm": 4.9522914620153, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 404480608, + "step": 4171 + }, + { + "epoch": 0.407802111849824, + "loss": 0.08144249022006989, + "loss_ce": 0.008322374895215034, + "loss_iou": 0.1875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 404480608, + "step": 4171 + }, + { + "epoch": 0.40789988267500976, + "grad_norm": 6.578861228393436, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 404577332, + "step": 4172 + }, + { + "epoch": 0.40789988267500976, + "loss": 0.07574784755706787, + "loss_ce": 0.006137250456959009, + "loss_iou": 0.33203125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 404577332, + "step": 4172 + }, + { + "epoch": 0.40799765350019557, + "grad_norm": 23.900363827407467, + "learning_rate": 5e-05, + "loss": 0.0945, + "num_input_tokens_seen": 404673312, + "step": 4173 + }, + { + "epoch": 0.40799765350019557, + "loss": 0.09875404834747314, + "loss_ce": 0.0042258454486727715, + "loss_iou": 0.2470703125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 404673312, + "step": 4173 + }, + { + "epoch": 0.4080954243253813, + "grad_norm": 6.072482856988648, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 404769348, + "step": 4174 + }, + { + "epoch": 0.4080954243253813, + "loss": 0.08892928063869476, + "loss_ce": 0.0066844099201262, + "loss_iou": 0.2734375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 404769348, + "step": 4174 + }, + { + "epoch": 0.40819319515056707, + "grad_norm": 9.38691154237186, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 404866940, + "step": 4175 + }, + { + "epoch": 0.40819319515056707, + "loss": 0.05865858495235443, + "loss_ce": 0.004337298683822155, + "loss_iou": 0.408203125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 404866940, + "step": 4175 + }, + { + "epoch": 0.4082909659757528, + "grad_norm": 23.954551737724547, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 404964508, + "step": 4176 + }, + { + "epoch": 0.4082909659757528, + "loss": 0.06894854456186295, + "loss_ce": 0.0052431002259254456, + "loss_iou": 0.341796875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 404964508, + "step": 4176 + }, + { + "epoch": 0.4083887368009386, + "grad_norm": 6.54659984281483, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 405061412, + "step": 4177 + }, + { + "epoch": 0.4083887368009386, + "loss": 0.08375444263219833, + "loss_ce": 0.006773850880563259, + "loss_iou": 0.302734375, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 405061412, + "step": 4177 + }, + { + "epoch": 0.4084865076261244, + "grad_norm": 2.496032702806109, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 405158040, + "step": 4178 + }, + { + "epoch": 0.4084865076261244, + "loss": 0.043054983019828796, + "loss_ce": 0.003786490298807621, + "loss_iou": 0.23046875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 405158040, + "step": 4178 + }, + { + "epoch": 0.4085842784513101, + "grad_norm": 7.362859261415488, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 405255424, + "step": 4179 + }, + { + "epoch": 0.4085842784513101, + "loss": 0.08854404836893082, + "loss_ce": 0.009965099394321442, + "loss_iou": 0.1611328125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 405255424, + "step": 4179 + }, + { + "epoch": 0.4086820492764959, + "grad_norm": 2.28356924558875, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 405353140, + "step": 4180 + }, + { + "epoch": 0.4086820492764959, + "loss": 0.04745262861251831, + "loss_ce": 0.0038430108688771725, + "loss_iou": 0.2578125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 405353140, + "step": 4180 + }, + { + "epoch": 0.4087798201016817, + "grad_norm": 3.54186327920042, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 405449652, + "step": 4181 + }, + { + "epoch": 0.4087798201016817, + "loss": 0.09015588462352753, + "loss_ce": 0.0038521753158420324, + "loss_iou": 0.2412109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 405449652, + "step": 4181 + }, + { + "epoch": 0.40887759092686743, + "grad_norm": 4.987400253174512, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 405546788, + "step": 4182 + }, + { + "epoch": 0.40887759092686743, + "loss": 0.07975616306066513, + "loss_ce": 0.007833864539861679, + "loss_iou": 0.318359375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 405546788, + "step": 4182 + }, + { + "epoch": 0.4089753617520532, + "grad_norm": 6.520110544684352, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 405642448, + "step": 4183 + }, + { + "epoch": 0.4089753617520532, + "loss": 0.07485131174325943, + "loss_ce": 0.005339898634701967, + "loss_iou": 0.228515625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 405642448, + "step": 4183 + }, + { + "epoch": 0.40907313257723893, + "grad_norm": 3.05940862277973, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 405738368, + "step": 4184 + }, + { + "epoch": 0.40907313257723893, + "loss": 0.11486394703388214, + "loss_ce": 0.008021906949579716, + "loss_iou": 0.322265625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 405738368, + "step": 4184 + }, + { + "epoch": 0.40917090340242473, + "grad_norm": 3.629049175401973, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 405835432, + "step": 4185 + }, + { + "epoch": 0.40917090340242473, + "loss": 0.05757825821638107, + "loss_ce": 0.002280409447848797, + "loss_iou": 0.3046875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 405835432, + "step": 4185 + }, + { + "epoch": 0.4092686742276105, + "grad_norm": 30.366090118034393, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 405932120, + "step": 4186 + }, + { + "epoch": 0.4092686742276105, + "loss": 0.08413435518741608, + "loss_ce": 0.006329782772809267, + "loss_iou": 0.30859375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 405932120, + "step": 4186 + }, + { + "epoch": 0.40936644505279624, + "grad_norm": 11.286296462589252, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 406029664, + "step": 4187 + }, + { + "epoch": 0.40936644505279624, + "loss": 0.08648911118507385, + "loss_ce": 0.005129246041178703, + "loss_iou": 0.31640625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 406029664, + "step": 4187 + }, + { + "epoch": 0.409464215877982, + "grad_norm": 18.6480818016941, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 406124588, + "step": 4188 + }, + { + "epoch": 0.409464215877982, + "loss": 0.07936576008796692, + "loss_ce": 0.004185701720416546, + "loss_iou": 0.32421875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 406124588, + "step": 4188 + }, + { + "epoch": 0.4095619867031678, + "grad_norm": 28.3288609424014, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 406221280, + "step": 4189 + }, + { + "epoch": 0.4095619867031678, + "loss": 0.10231887549161911, + "loss_ce": 0.005974880885332823, + "loss_iou": 0.2734375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 406221280, + "step": 4189 + }, + { + "epoch": 0.40965975752835354, + "grad_norm": 9.533609275631768, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 406319300, + "step": 4190 + }, + { + "epoch": 0.40965975752835354, + "loss": 0.15891402959823608, + "loss_ce": 0.004739213269203901, + "loss_iou": 0.33984375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 406319300, + "step": 4190 + }, + { + "epoch": 0.4097575283535393, + "grad_norm": 14.82385026299871, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 406416536, + "step": 4191 + }, + { + "epoch": 0.4097575283535393, + "loss": 0.09395065158605576, + "loss_ce": 0.007860565558075905, + "loss_iou": 0.4375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 406416536, + "step": 4191 + }, + { + "epoch": 0.40985529917872504, + "grad_norm": 14.263468058564317, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 406513928, + "step": 4192 + }, + { + "epoch": 0.40985529917872504, + "loss": 0.07707024365663528, + "loss_ce": 0.0049266875721514225, + "loss_iou": 0.341796875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 406513928, + "step": 4192 + }, + { + "epoch": 0.40995307000391085, + "grad_norm": 4.937930920654116, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 406611524, + "step": 4193 + }, + { + "epoch": 0.40995307000391085, + "loss": 0.0605250708758831, + "loss_ce": 0.007104051299393177, + "loss_iou": 0.4140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 406611524, + "step": 4193 + }, + { + "epoch": 0.4100508408290966, + "grad_norm": 6.246144612989661, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 406710260, + "step": 4194 + }, + { + "epoch": 0.4100508408290966, + "loss": 0.10017631202936172, + "loss_ce": 0.00671622809022665, + "loss_iou": 0.3125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 406710260, + "step": 4194 + }, + { + "epoch": 0.41014861165428235, + "grad_norm": 10.881721629869888, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 406806800, + "step": 4195 + }, + { + "epoch": 0.41014861165428235, + "loss": 0.0532856322824955, + "loss_ce": 0.010744130238890648, + "loss_iou": 0.267578125, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 406806800, + "step": 4195 + }, + { + "epoch": 0.41024638247946815, + "grad_norm": 31.731523742892705, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 406903484, + "step": 4196 + }, + { + "epoch": 0.41024638247946815, + "loss": 0.13528557121753693, + "loss_ce": 0.005646905861794949, + "loss_iou": 0.279296875, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 406903484, + "step": 4196 + }, + { + "epoch": 0.4103441533046539, + "grad_norm": 16.266133460004617, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 407000800, + "step": 4197 + }, + { + "epoch": 0.4103441533046539, + "loss": 0.07714849710464478, + "loss_ce": 0.004905760753899813, + "loss_iou": 0.345703125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 407000800, + "step": 4197 + }, + { + "epoch": 0.41044192412983965, + "grad_norm": 2.8822206497915523, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 407098200, + "step": 4198 + }, + { + "epoch": 0.41044192412983965, + "loss": 0.07732710242271423, + "loss_ce": 0.003932327497750521, + "loss_iou": 0.25390625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 407098200, + "step": 4198 + }, + { + "epoch": 0.4105396949550254, + "grad_norm": 10.814615337441696, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 407194796, + "step": 4199 + }, + { + "epoch": 0.4105396949550254, + "loss": 0.13582533597946167, + "loss_ce": 0.010916887782514095, + "loss_iou": 0.25390625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 407194796, + "step": 4199 + }, + { + "epoch": 0.4106374657802112, + "grad_norm": 2.018793405831533, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 407291100, + "step": 4200 + }, + { + "epoch": 0.4106374657802112, + "loss": 0.10767990350723267, + "loss_ce": 0.007479255553334951, + "loss_iou": 0.2060546875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 407291100, + "step": 4200 + }, + { + "epoch": 0.41073523660539696, + "grad_norm": 3.805248540887587, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 407388268, + "step": 4201 + }, + { + "epoch": 0.41073523660539696, + "loss": 0.08440603315830231, + "loss_ce": 0.00934805627912283, + "loss_iou": 0.333984375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 407388268, + "step": 4201 + }, + { + "epoch": 0.4108330074305827, + "grad_norm": 12.44039233135128, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 407484808, + "step": 4202 + }, + { + "epoch": 0.4108330074305827, + "loss": 0.09101653099060059, + "loss_ce": 0.005689376965165138, + "loss_iou": 0.3046875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 407484808, + "step": 4202 + }, + { + "epoch": 0.41093077825576846, + "grad_norm": 2.602591747460018, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 407581420, + "step": 4203 + }, + { + "epoch": 0.41093077825576846, + "loss": 0.08146564662456512, + "loss_ce": 0.008070875890552998, + "loss_iou": 0.29296875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 407581420, + "step": 4203 + }, + { + "epoch": 0.41102854908095426, + "grad_norm": 6.999609433475487, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 407678492, + "step": 4204 + }, + { + "epoch": 0.41102854908095426, + "loss": 0.09141457080841064, + "loss_ce": 0.006484153680503368, + "loss_iou": 0.32421875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 407678492, + "step": 4204 + }, + { + "epoch": 0.41112631990614, + "grad_norm": 10.738631531727073, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 407774888, + "step": 4205 + }, + { + "epoch": 0.41112631990614, + "loss": 0.08337630331516266, + "loss_ce": 0.004824062809348106, + "loss_iou": 0.361328125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 407774888, + "step": 4205 + }, + { + "epoch": 0.41122409073132576, + "grad_norm": 16.784463260156727, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 407871852, + "step": 4206 + }, + { + "epoch": 0.41122409073132576, + "loss": 0.07387891411781311, + "loss_ce": 0.005534800700843334, + "loss_iou": 0.181640625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 407871852, + "step": 4206 + }, + { + "epoch": 0.4113218615565115, + "grad_norm": 12.76283417198299, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 407969112, + "step": 4207 + }, + { + "epoch": 0.4113218615565115, + "loss": 0.05723290890455246, + "loss_ce": 0.006146480329334736, + "loss_iou": 0.3828125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 407969112, + "step": 4207 + }, + { + "epoch": 0.4114196323816973, + "grad_norm": 3.883127943840395, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 408066584, + "step": 4208 + }, + { + "epoch": 0.4114196323816973, + "loss": 0.07332556694746017, + "loss_ce": 0.006339479237794876, + "loss_iou": 0.310546875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 408066584, + "step": 4208 + }, + { + "epoch": 0.41151740320688307, + "grad_norm": 10.856383865918446, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 408163784, + "step": 4209 + }, + { + "epoch": 0.41151740320688307, + "loss": 0.10466158390045166, + "loss_ce": 0.007539397105574608, + "loss_iou": 0.35546875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 408163784, + "step": 4209 + }, + { + "epoch": 0.4116151740320688, + "grad_norm": 14.909705946477372, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 408260228, + "step": 4210 + }, + { + "epoch": 0.4116151740320688, + "loss": 0.09488999098539352, + "loss_ce": 0.003703470341861248, + "loss_iou": 0.267578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 408260228, + "step": 4210 + }, + { + "epoch": 0.41171294485725457, + "grad_norm": 7.5181446071241185, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 408357196, + "step": 4211 + }, + { + "epoch": 0.41171294485725457, + "loss": 0.04247491434216499, + "loss_ce": 0.008211303502321243, + "loss_iou": 0.2099609375, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 408357196, + "step": 4211 + }, + { + "epoch": 0.4118107156824404, + "grad_norm": 26.303574506847692, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 408453676, + "step": 4212 + }, + { + "epoch": 0.4118107156824404, + "loss": 0.1059882789850235, + "loss_ce": 0.008087890222668648, + "loss_iou": 0.26171875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 408453676, + "step": 4212 + }, + { + "epoch": 0.4119084865076261, + "grad_norm": 30.889895385652814, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 408550556, + "step": 4213 + }, + { + "epoch": 0.4119084865076261, + "loss": 0.05915495380759239, + "loss_ce": 0.006130661815404892, + "loss_iou": 0.337890625, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 408550556, + "step": 4213 + }, + { + "epoch": 0.4120062573328119, + "grad_norm": 20.125874303682398, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 408648536, + "step": 4214 + }, + { + "epoch": 0.4120062573328119, + "loss": 0.10036254674196243, + "loss_ce": 0.0071008289232850075, + "loss_iou": 0.4296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 408648536, + "step": 4214 + }, + { + "epoch": 0.4121040281579976, + "grad_norm": 10.26661765296479, + "learning_rate": 5e-05, + "loss": 0.1317, + "num_input_tokens_seen": 408746280, + "step": 4215 + }, + { + "epoch": 0.4121040281579976, + "loss": 0.14450781047344208, + "loss_ce": 0.00949804112315178, + "loss_iou": 0.4140625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 408746280, + "step": 4215 + }, + { + "epoch": 0.41220179898318343, + "grad_norm": 6.431387641000738, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 408842928, + "step": 4216 + }, + { + "epoch": 0.41220179898318343, + "loss": 0.0908128023147583, + "loss_ce": 0.004417543299496174, + "loss_iou": 0.29296875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 408842928, + "step": 4216 + }, + { + "epoch": 0.4122995698083692, + "grad_norm": 6.750323284104429, + "learning_rate": 5e-05, + "loss": 0.1417, + "num_input_tokens_seen": 408939244, + "step": 4217 + }, + { + "epoch": 0.4122995698083692, + "loss": 0.10890959948301315, + "loss_ce": 0.0069351112470030785, + "loss_iou": 0.30078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 408939244, + "step": 4217 + }, + { + "epoch": 0.41239734063355493, + "grad_norm": 9.30681908595325, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 409036200, + "step": 4218 + }, + { + "epoch": 0.41239734063355493, + "loss": 0.13396185636520386, + "loss_ce": 0.011265925131738186, + "loss_iou": 0.25390625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 409036200, + "step": 4218 + }, + { + "epoch": 0.41249511145874074, + "grad_norm": 8.51612141863948, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 409132936, + "step": 4219 + }, + { + "epoch": 0.41249511145874074, + "loss": 0.1120985746383667, + "loss_ce": 0.0102308988571167, + "loss_iou": 0.25390625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 409132936, + "step": 4219 + }, + { + "epoch": 0.4125928822839265, + "grad_norm": 8.02047482355716, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 409230180, + "step": 4220 + }, + { + "epoch": 0.4125928822839265, + "loss": 0.11208714544773102, + "loss_ce": 0.00658787414431572, + "loss_iou": 0.259765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 409230180, + "step": 4220 + }, + { + "epoch": 0.41269065310911224, + "grad_norm": 7.437668459409185, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 409327700, + "step": 4221 + }, + { + "epoch": 0.41269065310911224, + "loss": 0.06461383402347565, + "loss_ce": 0.003746525850147009, + "loss_iou": 0.435546875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 409327700, + "step": 4221 + }, + { + "epoch": 0.412788423934298, + "grad_norm": 6.754652162080944, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 409425756, + "step": 4222 + }, + { + "epoch": 0.412788423934298, + "loss": 0.13281577825546265, + "loss_ce": 0.0029482292011380196, + "loss_iou": 0.3046875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 409425756, + "step": 4222 + }, + { + "epoch": 0.4128861947594838, + "grad_norm": 8.01403620868104, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 409522664, + "step": 4223 + }, + { + "epoch": 0.4128861947594838, + "loss": 0.09864052385091782, + "loss_ce": 0.005638207774609327, + "loss_iou": 0.294921875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 409522664, + "step": 4223 + }, + { + "epoch": 0.41298396558466954, + "grad_norm": 11.183439599871365, + "learning_rate": 5e-05, + "loss": 0.1188, + "num_input_tokens_seen": 409619668, + "step": 4224 + }, + { + "epoch": 0.41298396558466954, + "loss": 0.09563948214054108, + "loss_ce": 0.002988114021718502, + "loss_iou": 0.34375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 409619668, + "step": 4224 + }, + { + "epoch": 0.4130817364098553, + "grad_norm": 8.917924228654764, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 409716704, + "step": 4225 + }, + { + "epoch": 0.4130817364098553, + "loss": 0.08446592837572098, + "loss_ce": 0.008126205764710903, + "loss_iou": 0.34765625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 409716704, + "step": 4225 + }, + { + "epoch": 0.41317950723504104, + "grad_norm": 23.352549630483338, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 409812844, + "step": 4226 + }, + { + "epoch": 0.41317950723504104, + "loss": 0.10960158705711365, + "loss_ce": 0.005845644045621157, + "loss_iou": 0.2080078125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 409812844, + "step": 4226 + }, + { + "epoch": 0.41327727806022685, + "grad_norm": 10.700564250749958, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 409910952, + "step": 4227 + }, + { + "epoch": 0.41327727806022685, + "loss": 0.0651770606637001, + "loss_ce": 0.004035097546875477, + "loss_iou": 0.34375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 409910952, + "step": 4227 + }, + { + "epoch": 0.4133750488854126, + "grad_norm": 16.855574557613693, + "learning_rate": 5e-05, + "loss": 0.1236, + "num_input_tokens_seen": 410007228, + "step": 4228 + }, + { + "epoch": 0.4133750488854126, + "loss": 0.1414443999528885, + "loss_ce": 0.009356691502034664, + "loss_iou": 0.28515625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 410007228, + "step": 4228 + }, + { + "epoch": 0.41347281971059835, + "grad_norm": 10.14937395602897, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 410104172, + "step": 4229 + }, + { + "epoch": 0.41347281971059835, + "loss": 0.0644841268658638, + "loss_ce": 0.006298552267253399, + "loss_iou": 0.341796875, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 410104172, + "step": 4229 + }, + { + "epoch": 0.4135705905357841, + "grad_norm": 5.635889927065455, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 410200784, + "step": 4230 + }, + { + "epoch": 0.4135705905357841, + "loss": 0.06987497210502625, + "loss_ce": 0.003041477408260107, + "loss_iou": 0.322265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 410200784, + "step": 4230 + }, + { + "epoch": 0.4136683613609699, + "grad_norm": 3.89787089150276, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 410297268, + "step": 4231 + }, + { + "epoch": 0.4136683613609699, + "loss": 0.05223323032259941, + "loss_ce": 0.00476313941180706, + "loss_iou": 0.353515625, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 410297268, + "step": 4231 + }, + { + "epoch": 0.41376613218615566, + "grad_norm": 4.100506404707937, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 410394208, + "step": 4232 + }, + { + "epoch": 0.41376613218615566, + "loss": 0.08287408947944641, + "loss_ce": 0.001468454021960497, + "loss_iou": 0.30859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 410394208, + "step": 4232 + }, + { + "epoch": 0.4138639030113414, + "grad_norm": 5.077057586456372, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 410492388, + "step": 4233 + }, + { + "epoch": 0.4138639030113414, + "loss": 0.08508527278900146, + "loss_ce": 0.002458931878209114, + "loss_iou": 0.330078125, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 410492388, + "step": 4233 + }, + { + "epoch": 0.41396167383652716, + "grad_norm": 14.205875169880606, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 410589264, + "step": 4234 + }, + { + "epoch": 0.41396167383652716, + "loss": 0.056280747056007385, + "loss_ce": 0.005514755845069885, + "loss_iou": 0.37109375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 410589264, + "step": 4234 + }, + { + "epoch": 0.41405944466171296, + "grad_norm": 7.012685786154849, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 410685600, + "step": 4235 + }, + { + "epoch": 0.41405944466171296, + "loss": 0.15812313556671143, + "loss_ce": 0.0071831876412034035, + "loss_iou": 0.275390625, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 410685600, + "step": 4235 + }, + { + "epoch": 0.4141572154868987, + "grad_norm": 4.542621652534861, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 410783240, + "step": 4236 + }, + { + "epoch": 0.4141572154868987, + "loss": 0.08130025863647461, + "loss_ce": 0.003968710545450449, + "loss_iou": 0.4453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 410783240, + "step": 4236 + }, + { + "epoch": 0.41425498631208446, + "grad_norm": 19.94922913859009, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 410880840, + "step": 4237 + }, + { + "epoch": 0.41425498631208446, + "loss": 0.08335858583450317, + "loss_ce": 0.007034119218587875, + "loss_iou": 0.439453125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 410880840, + "step": 4237 + }, + { + "epoch": 0.4143527571372702, + "grad_norm": 18.478296238703173, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 410977764, + "step": 4238 + }, + { + "epoch": 0.4143527571372702, + "loss": 0.06250457465648651, + "loss_ce": 0.0028427140787243843, + "loss_iou": 0.279296875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 410977764, + "step": 4238 + }, + { + "epoch": 0.414450527962456, + "grad_norm": 13.984933209247773, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 411074288, + "step": 4239 + }, + { + "epoch": 0.414450527962456, + "loss": 0.13789542019367218, + "loss_ce": 0.005952674429863691, + "loss_iou": 0.30859375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 411074288, + "step": 4239 + }, + { + "epoch": 0.41454829878764177, + "grad_norm": 14.084331304677155, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 411170216, + "step": 4240 + }, + { + "epoch": 0.41454829878764177, + "loss": 0.06483831256628036, + "loss_ce": 0.013027088716626167, + "loss_iou": 0.244140625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 411170216, + "step": 4240 + }, + { + "epoch": 0.4146460696128275, + "grad_norm": 18.828096608974715, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 411267948, + "step": 4241 + }, + { + "epoch": 0.4146460696128275, + "loss": 0.08476017415523529, + "loss_ce": 0.0074286311864852905, + "loss_iou": 0.41015625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 411267948, + "step": 4241 + }, + { + "epoch": 0.4147438404380133, + "grad_norm": 36.877473255272776, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 411364640, + "step": 4242 + }, + { + "epoch": 0.4147438404380133, + "loss": 0.08312967419624329, + "loss_ce": 0.004516391083598137, + "loss_iou": 0.24609375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 411364640, + "step": 4242 + }, + { + "epoch": 0.4148416112631991, + "grad_norm": 31.95854522127283, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 411461328, + "step": 4243 + }, + { + "epoch": 0.4148416112631991, + "loss": 0.05995556339621544, + "loss_ce": 0.003841366618871689, + "loss_iou": 0.322265625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 411461328, + "step": 4243 + }, + { + "epoch": 0.4149393820883848, + "grad_norm": 3.4569784326434077, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 411558308, + "step": 4244 + }, + { + "epoch": 0.4149393820883848, + "loss": 0.11569550633430481, + "loss_ce": 0.004886185750365257, + "loss_iou": 0.287109375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 411558308, + "step": 4244 + }, + { + "epoch": 0.4150371529135706, + "grad_norm": 4.785925922251022, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 411655804, + "step": 4245 + }, + { + "epoch": 0.4150371529135706, + "loss": 0.056973833590745926, + "loss_ce": 0.005475417710840702, + "loss_iou": 0.263671875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 411655804, + "step": 4245 + }, + { + "epoch": 0.4151349237387564, + "grad_norm": 14.20991331340977, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 411752664, + "step": 4246 + }, + { + "epoch": 0.4151349237387564, + "loss": 0.12237916886806488, + "loss_ce": 0.003818378783762455, + "loss_iou": 0.3125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 411752664, + "step": 4246 + }, + { + "epoch": 0.41523269456394213, + "grad_norm": 3.284848327432815, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 411848948, + "step": 4247 + }, + { + "epoch": 0.41523269456394213, + "loss": 0.06521324813365936, + "loss_ce": 0.006779718212783337, + "loss_iou": 0.39453125, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 411848948, + "step": 4247 + }, + { + "epoch": 0.4153304653891279, + "grad_norm": 35.27276476488619, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 411945828, + "step": 4248 + }, + { + "epoch": 0.4153304653891279, + "loss": 0.0716530978679657, + "loss_ce": 0.011106220073997974, + "loss_iou": 0.2490234375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 411945828, + "step": 4248 + }, + { + "epoch": 0.41542823621431363, + "grad_norm": 21.592593803472962, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 412042264, + "step": 4249 + }, + { + "epoch": 0.41542823621431363, + "loss": 0.11417605727910995, + "loss_ce": 0.004221217706799507, + "loss_iou": 0.2080078125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 412042264, + "step": 4249 + }, + { + "epoch": 0.41552600703949943, + "grad_norm": 2.320419665716721, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 412139856, + "step": 4250 + }, + { + "epoch": 0.41552600703949943, + "eval_seeclick_CIoU": 0.4590296298265457, + "eval_seeclick_GIoU": 0.4724835753440857, + "eval_seeclick_IoU": 0.5057820528745651, + "eval_seeclick_MAE_all": 0.07551977038383484, + "eval_seeclick_MAE_h": 0.037199197337031364, + "eval_seeclick_MAE_w": 0.10838687792420387, + "eval_seeclick_MAE_x": 0.11964447796344757, + "eval_seeclick_MAE_y": 0.0368485189974308, + "eval_seeclick_NUM_probability": 0.9999954104423523, + "eval_seeclick_inside_bbox": 0.7073863744735718, + "eval_seeclick_loss": 0.2619836926460266, + "eval_seeclick_loss_ce": 0.010309399105608463, + "eval_seeclick_loss_iou": 0.484375, + "eval_seeclick_loss_num": 0.0501861572265625, + "eval_seeclick_loss_xval": 0.251129150390625, + "eval_seeclick_runtime": 75.5995, + "eval_seeclick_samples_per_second": 0.569, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 412139856, + "step": 4250 + }, + { + "epoch": 0.41552600703949943, + "eval_icons_CIoU": 0.7086760997772217, + "eval_icons_GIoU": 0.702379047870636, + "eval_icons_IoU": 0.7354439198970795, + "eval_icons_MAE_all": 0.05435138754546642, + "eval_icons_MAE_h": 0.06390436179935932, + "eval_icons_MAE_w": 0.046009816229343414, + "eval_icons_MAE_x": 0.04585954733192921, + "eval_icons_MAE_y": 0.0616318192332983, + "eval_icons_NUM_probability": 0.9999807476997375, + "eval_icons_inside_bbox": 0.8350694477558136, + "eval_icons_loss": 0.16758300364017487, + "eval_icons_loss_ce": 1.4165837001200998e-05, + "eval_icons_loss_iou": 0.39678955078125, + "eval_icons_loss_num": 0.033985137939453125, + "eval_icons_loss_xval": 0.170013427734375, + "eval_icons_runtime": 86.1657, + "eval_icons_samples_per_second": 0.58, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 412139856, + "step": 4250 + }, + { + "epoch": 0.41552600703949943, + "eval_screenspot_CIoU": 0.3340082863966624, + "eval_screenspot_GIoU": 0.30472201108932495, + "eval_screenspot_IoU": 0.42485886812210083, + "eval_screenspot_MAE_all": 0.16019215186436972, + "eval_screenspot_MAE_h": 0.11955755949020386, + "eval_screenspot_MAE_w": 0.21380160997311273, + "eval_screenspot_MAE_x": 0.18012859175602594, + "eval_screenspot_MAE_y": 0.12728085865577063, + "eval_screenspot_NUM_probability": 0.9999789794286092, + "eval_screenspot_inside_bbox": 0.6775000095367432, + "eval_screenspot_loss": 0.5579949021339417, + "eval_screenspot_loss_ce": 0.023729073504606884, + "eval_screenspot_loss_iou": 0.3687744140625, + "eval_screenspot_loss_num": 0.10702006022135417, + "eval_screenspot_loss_xval": 0.5349934895833334, + "eval_screenspot_runtime": 159.3903, + "eval_screenspot_samples_per_second": 0.558, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 412139856, + "step": 4250 + }, + { + "epoch": 0.41552600703949943, + "eval_compot_CIoU": 0.42728887498378754, + "eval_compot_GIoU": 0.40612514317035675, + "eval_compot_IoU": 0.5107555538415909, + "eval_compot_MAE_all": 0.11321975663304329, + "eval_compot_MAE_h": 0.09040544927120209, + "eval_compot_MAE_w": 0.13793329149484634, + "eval_compot_MAE_x": 0.13468825072050095, + "eval_compot_MAE_y": 0.08985205739736557, + "eval_compot_NUM_probability": 0.9999822974205017, + "eval_compot_inside_bbox": 0.6927083432674408, + "eval_compot_loss": 0.3317519724369049, + "eval_compot_loss_ce": 0.012884165160357952, + "eval_compot_loss_iou": 0.451416015625, + "eval_compot_loss_num": 0.05474090576171875, + "eval_compot_loss_xval": 0.2738189697265625, + "eval_compot_runtime": 91.0147, + "eval_compot_samples_per_second": 0.549, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 412139856, + "step": 4250 + }, + { + "epoch": 0.41552600703949943, + "loss": 0.2634075880050659, + "loss_ce": 0.011179810389876366, + "loss_iou": 0.462890625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 412139856, + "step": 4250 + }, + { + "epoch": 0.4156237778646852, + "grad_norm": 8.187757280079131, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 412237224, + "step": 4251 + }, + { + "epoch": 0.4156237778646852, + "loss": 0.09930436313152313, + "loss_ce": 0.006012132856994867, + "loss_iou": 0.244140625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 412237224, + "step": 4251 + }, + { + "epoch": 0.41572154868987093, + "grad_norm": 1.6988870640684712, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 412333828, + "step": 4252 + }, + { + "epoch": 0.41572154868987093, + "loss": 0.06686349213123322, + "loss_ce": 0.006713345646858215, + "loss_iou": 0.302734375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 412333828, + "step": 4252 + }, + { + "epoch": 0.4158193195150567, + "grad_norm": 10.378034692175413, + "learning_rate": 5e-05, + "loss": 0.1479, + "num_input_tokens_seen": 412430636, + "step": 4253 + }, + { + "epoch": 0.4158193195150567, + "loss": 0.16831794381141663, + "loss_ce": 0.008848346769809723, + "loss_iou": 0.279296875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 412430636, + "step": 4253 + }, + { + "epoch": 0.4159170903402425, + "grad_norm": 9.872536812052793, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 412527424, + "step": 4254 + }, + { + "epoch": 0.4159170903402425, + "loss": 0.03489897772669792, + "loss_ce": 0.002893670229241252, + "loss_iou": 0.251953125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 412527424, + "step": 4254 + }, + { + "epoch": 0.41601486116542824, + "grad_norm": 1.8377367425797315, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 412625220, + "step": 4255 + }, + { + "epoch": 0.41601486116542824, + "loss": 0.09984230995178223, + "loss_ce": 0.004963153973221779, + "loss_iou": 0.318359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 412625220, + "step": 4255 + }, + { + "epoch": 0.416112631990614, + "grad_norm": 2.9078146050273657, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 412722504, + "step": 4256 + }, + { + "epoch": 0.416112631990614, + "loss": 0.09563596546649933, + "loss_ce": 0.0029235598631203175, + "loss_iou": 0.28515625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 412722504, + "step": 4256 + }, + { + "epoch": 0.41621040281579974, + "grad_norm": 5.381608779528225, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 412819348, + "step": 4257 + }, + { + "epoch": 0.41621040281579974, + "loss": 0.06489808857440948, + "loss_ce": 0.006136494688689709, + "loss_iou": 0.296875, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 412819348, + "step": 4257 + }, + { + "epoch": 0.41630817364098555, + "grad_norm": 4.205133852741249, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 412915888, + "step": 4258 + }, + { + "epoch": 0.41630817364098555, + "loss": 0.03949705511331558, + "loss_ce": 0.004607837181538343, + "loss_iou": 0.28125, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 412915888, + "step": 4258 + }, + { + "epoch": 0.4164059444661713, + "grad_norm": 14.15876536307572, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 413012488, + "step": 4259 + }, + { + "epoch": 0.4164059444661713, + "loss": 0.1826799362897873, + "loss_ce": 0.010011482983827591, + "loss_iou": 0.30859375, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 413012488, + "step": 4259 + }, + { + "epoch": 0.41650371529135705, + "grad_norm": 34.55240566353234, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 413109300, + "step": 4260 + }, + { + "epoch": 0.41650371529135705, + "loss": 0.08601166307926178, + "loss_ce": 0.005216378718614578, + "loss_iou": 0.349609375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 413109300, + "step": 4260 + }, + { + "epoch": 0.4166014861165428, + "grad_norm": 7.760802001907089, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 413206476, + "step": 4261 + }, + { + "epoch": 0.4166014861165428, + "loss": 0.05843767523765564, + "loss_ce": 0.003918025176972151, + "loss_iou": 0.462890625, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 413206476, + "step": 4261 + }, + { + "epoch": 0.4166992569417286, + "grad_norm": 4.470893067221145, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 413303436, + "step": 4262 + }, + { + "epoch": 0.4166992569417286, + "loss": 0.08426249772310257, + "loss_ce": 0.004199631977826357, + "loss_iou": 0.35546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 413303436, + "step": 4262 + }, + { + "epoch": 0.41679702776691435, + "grad_norm": 6.784013347872748, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 413400004, + "step": 4263 + }, + { + "epoch": 0.41679702776691435, + "loss": 0.057760175317525864, + "loss_ce": 0.006189284846186638, + "loss_iou": 0.2734375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 413400004, + "step": 4263 + }, + { + "epoch": 0.4168947985921001, + "grad_norm": 3.7378420279494184, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 413496816, + "step": 4264 + }, + { + "epoch": 0.4168947985921001, + "loss": 0.12428788840770721, + "loss_ce": 0.0037739716935902834, + "loss_iou": 0.2734375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 413496816, + "step": 4264 + }, + { + "epoch": 0.4169925694172859, + "grad_norm": 2.6237602833916336, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 413593824, + "step": 4265 + }, + { + "epoch": 0.4169925694172859, + "loss": 0.07650597393512726, + "loss_ce": 0.00704796239733696, + "loss_iou": 0.302734375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 413593824, + "step": 4265 + }, + { + "epoch": 0.41709034024247166, + "grad_norm": 4.82079483425088, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 413691208, + "step": 4266 + }, + { + "epoch": 0.41709034024247166, + "loss": 0.07085387408733368, + "loss_ce": 0.0022656191140413284, + "loss_iou": 0.328125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 413691208, + "step": 4266 + }, + { + "epoch": 0.4171881110676574, + "grad_norm": 9.324033765873766, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 413788760, + "step": 4267 + }, + { + "epoch": 0.4171881110676574, + "loss": 0.12080156058073044, + "loss_ce": 0.0046364008449018, + "loss_iou": 0.447265625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 413788760, + "step": 4267 + }, + { + "epoch": 0.41728588189284316, + "grad_norm": 7.952948903895642, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 413885388, + "step": 4268 + }, + { + "epoch": 0.41728588189284316, + "loss": 0.07847383618354797, + "loss_ce": 0.003003867343068123, + "loss_iou": 0.380859375, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 413885388, + "step": 4268 + }, + { + "epoch": 0.41738365271802896, + "grad_norm": 3.548268280861382, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 413980956, + "step": 4269 + }, + { + "epoch": 0.41738365271802896, + "loss": 0.06619562208652496, + "loss_ce": 0.005465643014758825, + "loss_iou": 0.158203125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 413980956, + "step": 4269 + }, + { + "epoch": 0.4174814235432147, + "grad_norm": 12.515009393500009, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 414077800, + "step": 4270 + }, + { + "epoch": 0.4174814235432147, + "loss": 0.10460864752531052, + "loss_ce": 0.007318610791116953, + "loss_iou": 0.212890625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 414077800, + "step": 4270 + }, + { + "epoch": 0.41757919436840046, + "grad_norm": 6.828574315506417, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 414175052, + "step": 4271 + }, + { + "epoch": 0.41757919436840046, + "loss": 0.08232274651527405, + "loss_ce": 0.0032364395447075367, + "loss_iou": 0.427734375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 414175052, + "step": 4271 + }, + { + "epoch": 0.4176769651935862, + "grad_norm": 4.921085338456426, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 414271992, + "step": 4272 + }, + { + "epoch": 0.4176769651935862, + "loss": 0.0914149060845375, + "loss_ce": 0.013900256715714931, + "loss_iou": 0.3203125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 414271992, + "step": 4272 + }, + { + "epoch": 0.417774736018772, + "grad_norm": 15.27738642293645, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 414369004, + "step": 4273 + }, + { + "epoch": 0.417774736018772, + "loss": 0.09625614434480667, + "loss_ce": 0.0051306551322340965, + "loss_iou": 0.330078125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 414369004, + "step": 4273 + }, + { + "epoch": 0.41787250684395777, + "grad_norm": 3.9022532152182543, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 414466008, + "step": 4274 + }, + { + "epoch": 0.41787250684395777, + "loss": 0.0868561714887619, + "loss_ce": 0.007998748682439327, + "loss_iou": 0.333984375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 414466008, + "step": 4274 + }, + { + "epoch": 0.4179702776691435, + "grad_norm": 10.14021065528701, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 414562544, + "step": 4275 + }, + { + "epoch": 0.4179702776691435, + "loss": 0.06926440447568893, + "loss_ce": 0.0051622288301587105, + "loss_iou": 0.2197265625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 414562544, + "step": 4275 + }, + { + "epoch": 0.41806804849432927, + "grad_norm": 20.111865910505472, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 414660772, + "step": 4276 + }, + { + "epoch": 0.41806804849432927, + "loss": 0.07978422939777374, + "loss_ce": 0.005405264440923929, + "loss_iou": 0.32421875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 414660772, + "step": 4276 + }, + { + "epoch": 0.4181658193195151, + "grad_norm": 9.562574837089278, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 414757672, + "step": 4277 + }, + { + "epoch": 0.4181658193195151, + "loss": 0.052386574447155, + "loss_ce": 0.0023072250187397003, + "loss_iou": 0.37890625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 414757672, + "step": 4277 + }, + { + "epoch": 0.4182635901447008, + "grad_norm": 12.592240793439466, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 414855216, + "step": 4278 + }, + { + "epoch": 0.4182635901447008, + "loss": 0.053787630051374435, + "loss_ce": 0.0012973974226042628, + "loss_iou": 0.361328125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 414855216, + "step": 4278 + }, + { + "epoch": 0.4183613609698866, + "grad_norm": 9.225069577493887, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 414951376, + "step": 4279 + }, + { + "epoch": 0.4183613609698866, + "loss": 0.11839228868484497, + "loss_ce": 0.004218406975269318, + "loss_iou": 0.294921875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 414951376, + "step": 4279 + }, + { + "epoch": 0.4184591317950723, + "grad_norm": 54.611855608716084, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 415047596, + "step": 4280 + }, + { + "epoch": 0.4184591317950723, + "loss": 0.08927752822637558, + "loss_ce": 0.0036409113090485334, + "loss_iou": 0.302734375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 415047596, + "step": 4280 + }, + { + "epoch": 0.41855690262025813, + "grad_norm": 10.605159273214605, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 415143624, + "step": 4281 + }, + { + "epoch": 0.41855690262025813, + "loss": 0.07328985631465912, + "loss_ce": 0.007188788149505854, + "loss_iou": 0.34765625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 415143624, + "step": 4281 + }, + { + "epoch": 0.4186546734454439, + "grad_norm": 2.530788318981361, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 415240552, + "step": 4282 + }, + { + "epoch": 0.4186546734454439, + "loss": 0.06066248565912247, + "loss_ce": 0.006364083848893642, + "loss_iou": 0.31640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 415240552, + "step": 4282 + }, + { + "epoch": 0.41875244427062963, + "grad_norm": 18.611067947557515, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 415337856, + "step": 4283 + }, + { + "epoch": 0.41875244427062963, + "loss": 0.10765254497528076, + "loss_ce": 0.0032976858783513308, + "loss_iou": 0.361328125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 415337856, + "step": 4283 + }, + { + "epoch": 0.4188502150958154, + "grad_norm": 5.0731305512803155, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 415435640, + "step": 4284 + }, + { + "epoch": 0.4188502150958154, + "loss": 0.10612857341766357, + "loss_ce": 0.005252717062830925, + "loss_iou": 0.328125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 415435640, + "step": 4284 + }, + { + "epoch": 0.4189479859210012, + "grad_norm": 4.060857376322205, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 415533132, + "step": 4285 + }, + { + "epoch": 0.4189479859210012, + "loss": 0.0665786862373352, + "loss_ce": 0.0018203905783593655, + "loss_iou": 0.302734375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 415533132, + "step": 4285 + }, + { + "epoch": 0.41904575674618694, + "grad_norm": 3.5285583599397934, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 415630408, + "step": 4286 + }, + { + "epoch": 0.41904575674618694, + "loss": 0.08508911728858948, + "loss_ce": 0.004431154578924179, + "loss_iou": 0.263671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 415630408, + "step": 4286 + }, + { + "epoch": 0.4191435275713727, + "grad_norm": 7.710045751202363, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 415727280, + "step": 4287 + }, + { + "epoch": 0.4191435275713727, + "loss": 0.10897958278656006, + "loss_ce": 0.0010846799705177546, + "loss_iou": 0.248046875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 415727280, + "step": 4287 + }, + { + "epoch": 0.4192412983965585, + "grad_norm": 7.095101530030844, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 415823420, + "step": 4288 + }, + { + "epoch": 0.4192412983965585, + "loss": 0.08646178990602493, + "loss_ce": 0.008321528322994709, + "loss_iou": 0.2275390625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 415823420, + "step": 4288 + }, + { + "epoch": 0.41933906922174424, + "grad_norm": 2.89923558589504, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 415919032, + "step": 4289 + }, + { + "epoch": 0.41933906922174424, + "loss": 0.10054874420166016, + "loss_ce": 0.006249435245990753, + "loss_iou": 0.1494140625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 415919032, + "step": 4289 + }, + { + "epoch": 0.41943684004693, + "grad_norm": 11.862952424007329, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 416015996, + "step": 4290 + }, + { + "epoch": 0.41943684004693, + "loss": 0.09191210567951202, + "loss_ce": 0.008034544065594673, + "loss_iou": 0.22265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 416015996, + "step": 4290 + }, + { + "epoch": 0.41953461087211574, + "grad_norm": 7.602904796886037, + "learning_rate": 5e-05, + "loss": 0.119, + "num_input_tokens_seen": 416113564, + "step": 4291 + }, + { + "epoch": 0.41953461087211574, + "loss": 0.0945529192686081, + "loss_ce": 0.009111334569752216, + "loss_iou": 0.396484375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 416113564, + "step": 4291 + }, + { + "epoch": 0.41963238169730155, + "grad_norm": 8.076787436181146, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 416209440, + "step": 4292 + }, + { + "epoch": 0.41963238169730155, + "loss": 0.08857360482215881, + "loss_ce": 0.004680788144469261, + "loss_iou": 0.34765625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 416209440, + "step": 4292 + }, + { + "epoch": 0.4197301525224873, + "grad_norm": 3.777294343362725, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 416305980, + "step": 4293 + }, + { + "epoch": 0.4197301525224873, + "loss": 0.09939737617969513, + "loss_ce": 0.01043863594532013, + "loss_iou": 0.384765625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 416305980, + "step": 4293 + }, + { + "epoch": 0.41982792334767305, + "grad_norm": 7.499216931323039, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 416403572, + "step": 4294 + }, + { + "epoch": 0.41982792334767305, + "loss": 0.06562608480453491, + "loss_ce": 0.0021037422120571136, + "loss_iou": 0.326171875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 416403572, + "step": 4294 + }, + { + "epoch": 0.4199256941728588, + "grad_norm": 6.71844062866931, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 416501296, + "step": 4295 + }, + { + "epoch": 0.4199256941728588, + "loss": 0.12975051999092102, + "loss_ce": 0.003651888808235526, + "loss_iou": 0.30859375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 416501296, + "step": 4295 + }, + { + "epoch": 0.4200234649980446, + "grad_norm": 40.25313013016889, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 416598024, + "step": 4296 + }, + { + "epoch": 0.4200234649980446, + "loss": 0.09111903607845306, + "loss_ce": 0.005578269250690937, + "loss_iou": 0.3515625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 416598024, + "step": 4296 + }, + { + "epoch": 0.42012123582323035, + "grad_norm": 3.421616053069752, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 416694812, + "step": 4297 + }, + { + "epoch": 0.42012123582323035, + "loss": 0.10729488730430603, + "loss_ce": 0.0076855141669511795, + "loss_iou": 0.326171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 416694812, + "step": 4297 + }, + { + "epoch": 0.4202190066484161, + "grad_norm": 32.72491680418796, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 416791640, + "step": 4298 + }, + { + "epoch": 0.4202190066484161, + "loss": 0.07365784794092178, + "loss_ce": 0.0025518948677927256, + "loss_iou": 0.419921875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 416791640, + "step": 4298 + }, + { + "epoch": 0.42031677747360185, + "grad_norm": 8.56556928886018, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 416888744, + "step": 4299 + }, + { + "epoch": 0.42031677747360185, + "loss": 0.07574808597564697, + "loss_ce": 0.0029331438709050417, + "loss_iou": 0.416015625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 416888744, + "step": 4299 + }, + { + "epoch": 0.42041454829878766, + "grad_norm": 8.4278919335522, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 416986260, + "step": 4300 + }, + { + "epoch": 0.42041454829878766, + "loss": 0.09458137303590775, + "loss_ce": 0.009288551285862923, + "loss_iou": 0.291015625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 416986260, + "step": 4300 + }, + { + "epoch": 0.4205123191239734, + "grad_norm": 4.613472982707004, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 417083120, + "step": 4301 + }, + { + "epoch": 0.4205123191239734, + "loss": 0.10137931257486343, + "loss_ce": 0.006896897219121456, + "loss_iou": 0.439453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 417083120, + "step": 4301 + }, + { + "epoch": 0.42061008994915916, + "grad_norm": 10.37346246777083, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 417179688, + "step": 4302 + }, + { + "epoch": 0.42061008994915916, + "loss": 0.10751733183860779, + "loss_ce": 0.0034829103387892246, + "loss_iou": 0.28515625, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 417179688, + "step": 4302 + }, + { + "epoch": 0.4207078607743449, + "grad_norm": 12.589223420599328, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 417275984, + "step": 4303 + }, + { + "epoch": 0.4207078607743449, + "loss": 0.0507814921438694, + "loss_ce": 0.004471069201827049, + "loss_iou": 0.271484375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 417275984, + "step": 4303 + }, + { + "epoch": 0.4208056315995307, + "grad_norm": 9.8988051036048, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 417373060, + "step": 4304 + }, + { + "epoch": 0.4208056315995307, + "loss": 0.14930880069732666, + "loss_ce": 0.007417331915348768, + "loss_iou": 0.296875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 417373060, + "step": 4304 + }, + { + "epoch": 0.42090340242471647, + "grad_norm": 6.768269460429765, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 417470684, + "step": 4305 + }, + { + "epoch": 0.42090340242471647, + "loss": 0.12375178933143616, + "loss_ce": 0.006564283277839422, + "loss_iou": 0.38671875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 417470684, + "step": 4305 + }, + { + "epoch": 0.4210011732499022, + "grad_norm": 8.810656521725132, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 417568276, + "step": 4306 + }, + { + "epoch": 0.4210011732499022, + "loss": 0.12274675816297531, + "loss_ce": 0.006444264203310013, + "loss_iou": 0.404296875, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 417568276, + "step": 4306 + }, + { + "epoch": 0.42109894407508797, + "grad_norm": 19.540033417926196, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 417665488, + "step": 4307 + }, + { + "epoch": 0.42109894407508797, + "loss": 0.15056198835372925, + "loss_ce": 0.006610577926039696, + "loss_iou": 0.318359375, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 417665488, + "step": 4307 + }, + { + "epoch": 0.42119671490027377, + "grad_norm": 13.76339063627864, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 417763524, + "step": 4308 + }, + { + "epoch": 0.42119671490027377, + "loss": 0.06173840910196304, + "loss_ce": 0.004884161986410618, + "loss_iou": 0.357421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 417763524, + "step": 4308 + }, + { + "epoch": 0.4212944857254595, + "grad_norm": 11.700638713690239, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 417860892, + "step": 4309 + }, + { + "epoch": 0.4212944857254595, + "loss": 0.04563295096158981, + "loss_ce": 0.006116505712270737, + "loss_iou": 0.310546875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 417860892, + "step": 4309 + }, + { + "epoch": 0.42139225655064527, + "grad_norm": 84.31632892287469, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 417957200, + "step": 4310 + }, + { + "epoch": 0.42139225655064527, + "loss": 0.07201973348855972, + "loss_ce": 0.004713216796517372, + "loss_iou": 0.291015625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 417957200, + "step": 4310 + }, + { + "epoch": 0.4214900273758311, + "grad_norm": 12.896531948777929, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 418053672, + "step": 4311 + }, + { + "epoch": 0.4214900273758311, + "loss": 0.06787557899951935, + "loss_ce": 0.004261684603989124, + "loss_iou": 0.26953125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 418053672, + "step": 4311 + }, + { + "epoch": 0.42158779820101683, + "grad_norm": 10.252343464120777, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 418149880, + "step": 4312 + }, + { + "epoch": 0.42158779820101683, + "loss": 0.11387789249420166, + "loss_ce": 0.01222383975982666, + "loss_iou": 0.275390625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 418149880, + "step": 4312 + }, + { + "epoch": 0.4216855690262026, + "grad_norm": 8.64606561478231, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 418246552, + "step": 4313 + }, + { + "epoch": 0.4216855690262026, + "loss": 0.10917964577674866, + "loss_ce": 0.0061828200705349445, + "loss_iou": 0.2451171875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 418246552, + "step": 4313 + }, + { + "epoch": 0.42178333985138833, + "grad_norm": 5.547739585103074, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 418343308, + "step": 4314 + }, + { + "epoch": 0.42178333985138833, + "loss": 0.08930694311857224, + "loss_ce": 0.005169981624931097, + "loss_iou": 0.228515625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 418343308, + "step": 4314 + }, + { + "epoch": 0.42188111067657413, + "grad_norm": 3.8422737776094067, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 418440124, + "step": 4315 + }, + { + "epoch": 0.42188111067657413, + "loss": 0.0952659547328949, + "loss_ce": 0.0045524584129452705, + "loss_iou": 0.3203125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 418440124, + "step": 4315 + }, + { + "epoch": 0.4219788815017599, + "grad_norm": 4.758544242581986, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 418536460, + "step": 4316 + }, + { + "epoch": 0.4219788815017599, + "loss": 0.0649895966053009, + "loss_ce": 0.005487946327775717, + "loss_iou": 0.212890625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 418536460, + "step": 4316 + }, + { + "epoch": 0.42207665232694563, + "grad_norm": 9.393044103110142, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 418633244, + "step": 4317 + }, + { + "epoch": 0.42207665232694563, + "loss": 0.11147628724575043, + "loss_ce": 0.00953612755984068, + "loss_iou": 0.271484375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 418633244, + "step": 4317 + }, + { + "epoch": 0.4221744231521314, + "grad_norm": 9.79899543051706, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 418729596, + "step": 4318 + }, + { + "epoch": 0.4221744231521314, + "loss": 0.0900828167796135, + "loss_ce": 0.0036532199010252953, + "loss_iou": 0.345703125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 418729596, + "step": 4318 + }, + { + "epoch": 0.4222721939773172, + "grad_norm": 3.967325470106453, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 418825816, + "step": 4319 + }, + { + "epoch": 0.4222721939773172, + "loss": 0.05049945414066315, + "loss_ce": 0.006077302619814873, + "loss_iou": 0.27734375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 418825816, + "step": 4319 + }, + { + "epoch": 0.42236996480250294, + "grad_norm": 3.5077005416220404, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 418922868, + "step": 4320 + }, + { + "epoch": 0.42236996480250294, + "loss": 0.07209387421607971, + "loss_ce": 0.0033072480000555515, + "loss_iou": 0.283203125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 418922868, + "step": 4320 + }, + { + "epoch": 0.4224677356276887, + "grad_norm": 6.676531445662879, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 419020416, + "step": 4321 + }, + { + "epoch": 0.4224677356276887, + "loss": 0.10004517436027527, + "loss_ce": 0.00420471653342247, + "loss_iou": 0.31640625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 419020416, + "step": 4321 + }, + { + "epoch": 0.42256550645287444, + "grad_norm": 7.253208616420208, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 419117608, + "step": 4322 + }, + { + "epoch": 0.42256550645287444, + "loss": 0.08158835768699646, + "loss_ce": 0.0059352838434278965, + "loss_iou": 0.431640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 419117608, + "step": 4322 + }, + { + "epoch": 0.42266327727806025, + "grad_norm": 2.859683279282056, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 419213504, + "step": 4323 + }, + { + "epoch": 0.42266327727806025, + "loss": 0.026516897603869438, + "loss_ce": 0.002020817482843995, + "loss_iou": 0.228515625, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 419213504, + "step": 4323 + }, + { + "epoch": 0.422761048103246, + "grad_norm": 12.127635490283698, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 419311904, + "step": 4324 + }, + { + "epoch": 0.422761048103246, + "loss": 0.06432611495256424, + "loss_ce": 0.0022686191368848085, + "loss_iou": 0.35546875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 419311904, + "step": 4324 + }, + { + "epoch": 0.42285881892843175, + "grad_norm": 9.140247886696446, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 419409356, + "step": 4325 + }, + { + "epoch": 0.42285881892843175, + "loss": 0.1034616008400917, + "loss_ce": 0.0065530287101864815, + "loss_iou": 0.40625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 419409356, + "step": 4325 + }, + { + "epoch": 0.4229565897536175, + "grad_norm": 18.833825213202307, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 419505972, + "step": 4326 + }, + { + "epoch": 0.4229565897536175, + "loss": 0.06911350041627884, + "loss_ce": 0.002730138599872589, + "loss_iou": 0.298828125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 419505972, + "step": 4326 + }, + { + "epoch": 0.4230543605788033, + "grad_norm": 11.803147907878838, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 419602804, + "step": 4327 + }, + { + "epoch": 0.4230543605788033, + "loss": 0.1519445776939392, + "loss_ce": 0.00994627084583044, + "loss_iou": 0.33203125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 419602804, + "step": 4327 + }, + { + "epoch": 0.42315213140398905, + "grad_norm": 6.8897754731020875, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 419700512, + "step": 4328 + }, + { + "epoch": 0.42315213140398905, + "loss": 0.10936536639928818, + "loss_ce": 0.008840465918183327, + "loss_iou": 0.3671875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 419700512, + "step": 4328 + }, + { + "epoch": 0.4232499022291748, + "grad_norm": 8.008841944124967, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 419796016, + "step": 4329 + }, + { + "epoch": 0.4232499022291748, + "loss": 0.07942621409893036, + "loss_ce": 0.0036815849598497152, + "loss_iou": 0.32421875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 419796016, + "step": 4329 + }, + { + "epoch": 0.42334767305436055, + "grad_norm": 29.460666505128412, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 419893960, + "step": 4330 + }, + { + "epoch": 0.42334767305436055, + "loss": 0.06558933854103088, + "loss_ce": 0.008506214246153831, + "loss_iou": 0.2431640625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 419893960, + "step": 4330 + }, + { + "epoch": 0.42344544387954636, + "grad_norm": 16.679879896875587, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 419991800, + "step": 4331 + }, + { + "epoch": 0.42344544387954636, + "loss": 0.11510870605707169, + "loss_ce": 0.005764215253293514, + "loss_iou": 0.39453125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 419991800, + "step": 4331 + }, + { + "epoch": 0.4235432147047321, + "grad_norm": 9.427671108130923, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 420088652, + "step": 4332 + }, + { + "epoch": 0.4235432147047321, + "loss": 0.08848243951797485, + "loss_ce": 0.0040708198212087154, + "loss_iou": 0.33984375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 420088652, + "step": 4332 + }, + { + "epoch": 0.42364098552991786, + "grad_norm": 7.237897629870661, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 420185532, + "step": 4333 + }, + { + "epoch": 0.42364098552991786, + "loss": 0.06502285599708557, + "loss_ce": 0.0023855259642004967, + "loss_iou": 0.267578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 420185532, + "step": 4333 + }, + { + "epoch": 0.42373875635510366, + "grad_norm": 9.635979777807119, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 420282396, + "step": 4334 + }, + { + "epoch": 0.42373875635510366, + "loss": 0.06224864721298218, + "loss_ce": 0.010658683255314827, + "loss_iou": 0.302734375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 420282396, + "step": 4334 + }, + { + "epoch": 0.4238365271802894, + "grad_norm": 19.625084977436327, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 420379188, + "step": 4335 + }, + { + "epoch": 0.4238365271802894, + "loss": 0.0732693076133728, + "loss_ce": 0.0030788779258728027, + "loss_iou": 0.359375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 420379188, + "step": 4335 + }, + { + "epoch": 0.42393429800547516, + "grad_norm": 5.530727932791809, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 420475800, + "step": 4336 + }, + { + "epoch": 0.42393429800547516, + "loss": 0.060068558901548386, + "loss_ce": 0.004343460313975811, + "loss_iou": 0.2275390625, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 420475800, + "step": 4336 + }, + { + "epoch": 0.4240320688306609, + "grad_norm": 21.315325340683597, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 420571944, + "step": 4337 + }, + { + "epoch": 0.4240320688306609, + "loss": 0.07583631575107574, + "loss_ce": 0.0033875848166644573, + "loss_iou": 0.341796875, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 420571944, + "step": 4337 + }, + { + "epoch": 0.4241298396558467, + "grad_norm": 18.574235263969232, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 420668924, + "step": 4338 + }, + { + "epoch": 0.4241298396558467, + "loss": 0.07448911666870117, + "loss_ce": 0.008586407639086246, + "loss_iou": 0.28515625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 420668924, + "step": 4338 + }, + { + "epoch": 0.42422761048103247, + "grad_norm": 8.1108778924165, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 420765900, + "step": 4339 + }, + { + "epoch": 0.42422761048103247, + "loss": 0.08238766342401505, + "loss_ce": 0.003385285148397088, + "loss_iou": 0.271484375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 420765900, + "step": 4339 + }, + { + "epoch": 0.4243253813062182, + "grad_norm": 2.629378669420631, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 420862332, + "step": 4340 + }, + { + "epoch": 0.4243253813062182, + "loss": 0.11662067472934723, + "loss_ce": 0.004071847535669804, + "loss_iou": 0.396484375, + "loss_num": 0.0225830078125, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 420862332, + "step": 4340 + }, + { + "epoch": 0.42442315213140397, + "grad_norm": 6.807907064277592, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 420958852, + "step": 4341 + }, + { + "epoch": 0.42442315213140397, + "loss": 0.06432638317346573, + "loss_ce": 0.006632900331169367, + "loss_iou": 0.35546875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 420958852, + "step": 4341 + }, + { + "epoch": 0.4245209229565898, + "grad_norm": 8.777579676013973, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 421055152, + "step": 4342 + }, + { + "epoch": 0.4245209229565898, + "loss": 0.05829249322414398, + "loss_ce": 0.004825695417821407, + "loss_iou": 0.302734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 421055152, + "step": 4342 + }, + { + "epoch": 0.4246186937817755, + "grad_norm": 2.8607440583740984, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 421152108, + "step": 4343 + }, + { + "epoch": 0.4246186937817755, + "loss": 0.062934011220932, + "loss_ce": 0.004859056323766708, + "loss_iou": 0.314453125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 421152108, + "step": 4343 + }, + { + "epoch": 0.4247164646069613, + "grad_norm": 6.628374304197204, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 421248832, + "step": 4344 + }, + { + "epoch": 0.4247164646069613, + "loss": 0.054232873022556305, + "loss_ce": 0.0030548926442861557, + "loss_iou": 0.275390625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 421248832, + "step": 4344 + }, + { + "epoch": 0.424814235432147, + "grad_norm": 4.586954405760447, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 421345900, + "step": 4345 + }, + { + "epoch": 0.424814235432147, + "loss": 0.03765207529067993, + "loss_ce": 0.0036135336849838495, + "loss_iou": 0.24609375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 421345900, + "step": 4345 + }, + { + "epoch": 0.42491200625733283, + "grad_norm": 7.551336853875442, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 421442252, + "step": 4346 + }, + { + "epoch": 0.42491200625733283, + "loss": 0.06058930978178978, + "loss_ce": 0.004402635619044304, + "loss_iou": 0.22265625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 421442252, + "step": 4346 + }, + { + "epoch": 0.4250097770825186, + "grad_norm": 16.582530091376384, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 421539308, + "step": 4347 + }, + { + "epoch": 0.4250097770825186, + "loss": 0.03944561630487442, + "loss_ce": 0.0024888282641768456, + "loss_iou": 0.287109375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 421539308, + "step": 4347 + }, + { + "epoch": 0.42510754790770433, + "grad_norm": 22.697747305289305, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 421635764, + "step": 4348 + }, + { + "epoch": 0.42510754790770433, + "loss": 0.10236788541078568, + "loss_ce": 0.009136684238910675, + "loss_iou": 0.296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 421635764, + "step": 4348 + }, + { + "epoch": 0.4252053187328901, + "grad_norm": 17.766612437850856, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 421733032, + "step": 4349 + }, + { + "epoch": 0.4252053187328901, + "loss": 0.05444030463695526, + "loss_ce": 0.0037353485822677612, + "loss_iou": 0.35546875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 421733032, + "step": 4349 + }, + { + "epoch": 0.4253030895580759, + "grad_norm": 19.911504967166934, + "learning_rate": 5e-05, + "loss": 0.136, + "num_input_tokens_seen": 421830128, + "step": 4350 + }, + { + "epoch": 0.4253030895580759, + "loss": 0.13990353047847748, + "loss_ce": 0.013682827353477478, + "loss_iou": 0.27734375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 421830128, + "step": 4350 + }, + { + "epoch": 0.42540086038326164, + "grad_norm": 10.869411218075056, + "learning_rate": 5e-05, + "loss": 0.1337, + "num_input_tokens_seen": 421927428, + "step": 4351 + }, + { + "epoch": 0.42540086038326164, + "loss": 0.1344853639602661, + "loss_ce": 0.008142590522766113, + "loss_iou": 0.43359375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 421927428, + "step": 4351 + }, + { + "epoch": 0.4254986312084474, + "grad_norm": 10.836752315166155, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 422024684, + "step": 4352 + }, + { + "epoch": 0.4254986312084474, + "loss": 0.09256716817617416, + "loss_ce": 0.004661284852772951, + "loss_iou": 0.38671875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 422024684, + "step": 4352 + }, + { + "epoch": 0.42559640203363314, + "grad_norm": 8.870117715115061, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 422121920, + "step": 4353 + }, + { + "epoch": 0.42559640203363314, + "loss": 0.049399882555007935, + "loss_ce": 0.005477456375956535, + "loss_iou": 0.22265625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 422121920, + "step": 4353 + }, + { + "epoch": 0.42569417285881894, + "grad_norm": 9.55814777040256, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 422217948, + "step": 4354 + }, + { + "epoch": 0.42569417285881894, + "loss": 0.09467822313308716, + "loss_ce": 0.007367430254817009, + "loss_iou": 0.259765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 422217948, + "step": 4354 + }, + { + "epoch": 0.4257919436840047, + "grad_norm": 12.407718796025337, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 422315128, + "step": 4355 + }, + { + "epoch": 0.4257919436840047, + "loss": 0.054011765867471695, + "loss_ce": 0.00454277079552412, + "loss_iou": 0.318359375, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 422315128, + "step": 4355 + }, + { + "epoch": 0.42588971450919044, + "grad_norm": 10.117427968328661, + "learning_rate": 5e-05, + "loss": 0.1249, + "num_input_tokens_seen": 422411952, + "step": 4356 + }, + { + "epoch": 0.42588971450919044, + "loss": 0.09743249416351318, + "loss_ce": 0.006291741505265236, + "loss_iou": 0.31640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 422411952, + "step": 4356 + }, + { + "epoch": 0.42598748533437625, + "grad_norm": 4.249108681708299, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 422509520, + "step": 4357 + }, + { + "epoch": 0.42598748533437625, + "loss": 0.1353902518749237, + "loss_ce": 0.009573912248015404, + "loss_iou": 0.2392578125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 422509520, + "step": 4357 + }, + { + "epoch": 0.426085256159562, + "grad_norm": 6.602463168725424, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 422605188, + "step": 4358 + }, + { + "epoch": 0.426085256159562, + "loss": 0.0724867433309555, + "loss_ce": 0.0037611571606248617, + "loss_iou": 0.19140625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 422605188, + "step": 4358 + }, + { + "epoch": 0.42618302698474775, + "grad_norm": 5.766709047001505, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 422702548, + "step": 4359 + }, + { + "epoch": 0.42618302698474775, + "loss": 0.06109999492764473, + "loss_ce": 0.00534056918695569, + "loss_iou": 0.20703125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 422702548, + "step": 4359 + }, + { + "epoch": 0.4262807978099335, + "grad_norm": 5.284870123225636, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 422799764, + "step": 4360 + }, + { + "epoch": 0.4262807978099335, + "loss": 0.05857887864112854, + "loss_ce": 0.006706622429192066, + "loss_iou": 0.359375, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 422799764, + "step": 4360 + }, + { + "epoch": 0.4263785686351193, + "grad_norm": 13.82329746925695, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 422896476, + "step": 4361 + }, + { + "epoch": 0.4263785686351193, + "loss": 0.10166788846254349, + "loss_ce": 0.0075211599469184875, + "loss_iou": 0.318359375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 422896476, + "step": 4361 + }, + { + "epoch": 0.42647633946030505, + "grad_norm": 4.716532262144608, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 422993048, + "step": 4362 + }, + { + "epoch": 0.42647633946030505, + "loss": 0.09675756096839905, + "loss_ce": 0.003266960382461548, + "loss_iou": 0.22265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 422993048, + "step": 4362 + }, + { + "epoch": 0.4265741102854908, + "grad_norm": 13.509277756439307, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 423089936, + "step": 4363 + }, + { + "epoch": 0.4265741102854908, + "loss": 0.08006708323955536, + "loss_ce": 0.00748102692887187, + "loss_iou": 0.232421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 423089936, + "step": 4363 + }, + { + "epoch": 0.42667188111067655, + "grad_norm": 19.401314420518762, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 423186136, + "step": 4364 + }, + { + "epoch": 0.42667188111067655, + "loss": 0.0659739077091217, + "loss_ce": 0.0021463960874825716, + "loss_iou": 0.28515625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 423186136, + "step": 4364 + }, + { + "epoch": 0.42676965193586236, + "grad_norm": 6.754655172941585, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 423282656, + "step": 4365 + }, + { + "epoch": 0.42676965193586236, + "loss": 0.10034526884555817, + "loss_ce": 0.004916801117360592, + "loss_iou": 0.3671875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 423282656, + "step": 4365 + }, + { + "epoch": 0.4268674227610481, + "grad_norm": 9.297346137750417, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 423378936, + "step": 4366 + }, + { + "epoch": 0.4268674227610481, + "loss": 0.11778756976127625, + "loss_ce": 0.011372772045433521, + "loss_iou": 0.3515625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 423378936, + "step": 4366 + }, + { + "epoch": 0.42696519358623386, + "grad_norm": 9.743787577398118, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 423475628, + "step": 4367 + }, + { + "epoch": 0.42696519358623386, + "loss": 0.08917547762393951, + "loss_ce": 0.008395451121032238, + "loss_iou": 0.263671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 423475628, + "step": 4367 + }, + { + "epoch": 0.4270629644114196, + "grad_norm": 36.087431678723306, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 423572924, + "step": 4368 + }, + { + "epoch": 0.4270629644114196, + "loss": 0.11520880460739136, + "loss_ce": 0.005894835107028484, + "loss_iou": 0.3515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 423572924, + "step": 4368 + }, + { + "epoch": 0.4271607352366054, + "grad_norm": 5.533363367156435, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 423669416, + "step": 4369 + }, + { + "epoch": 0.4271607352366054, + "loss": 0.11031553149223328, + "loss_ce": 0.007059300318360329, + "loss_iou": 0.404296875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 423669416, + "step": 4369 + }, + { + "epoch": 0.42725850606179117, + "grad_norm": 5.300064736156603, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 423765804, + "step": 4370 + }, + { + "epoch": 0.42725850606179117, + "loss": 0.07301817089319229, + "loss_ce": 0.005238630808889866, + "loss_iou": 0.380859375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 423765804, + "step": 4370 + }, + { + "epoch": 0.4273562768869769, + "grad_norm": 24.37292550879863, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 423863100, + "step": 4371 + }, + { + "epoch": 0.4273562768869769, + "loss": 0.12463244795799255, + "loss_ce": 0.009611694142222404, + "loss_iou": 0.37109375, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 423863100, + "step": 4371 + }, + { + "epoch": 0.42745404771216267, + "grad_norm": 25.012071738812285, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 423961064, + "step": 4372 + }, + { + "epoch": 0.42745404771216267, + "loss": 0.08965447545051575, + "loss_ce": 0.0025573093444108963, + "loss_iou": 0.384765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 423961064, + "step": 4372 + }, + { + "epoch": 0.42755181853734847, + "grad_norm": 18.301103130539705, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 424057160, + "step": 4373 + }, + { + "epoch": 0.42755181853734847, + "loss": 0.1622461974620819, + "loss_ce": 0.007049046456813812, + "loss_iou": 0.3671875, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 424057160, + "step": 4373 + }, + { + "epoch": 0.4276495893625342, + "grad_norm": 14.637290654896793, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 424154356, + "step": 4374 + }, + { + "epoch": 0.4276495893625342, + "loss": 0.08471611142158508, + "loss_ce": 0.004927906207740307, + "loss_iou": 0.3203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 424154356, + "step": 4374 + }, + { + "epoch": 0.42774736018771997, + "grad_norm": 15.028266971661873, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 424250716, + "step": 4375 + }, + { + "epoch": 0.42774736018771997, + "loss": 0.06974144279956818, + "loss_ce": 0.003388599958270788, + "loss_iou": 0.291015625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 424250716, + "step": 4375 + }, + { + "epoch": 0.4278451310129057, + "grad_norm": 15.001272681832003, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 424346640, + "step": 4376 + }, + { + "epoch": 0.4278451310129057, + "loss": 0.08049048483371735, + "loss_ce": 0.0026401476934552193, + "loss_iou": 0.259765625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 424346640, + "step": 4376 + }, + { + "epoch": 0.4279429018380915, + "grad_norm": 4.982346822446547, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 424443720, + "step": 4377 + }, + { + "epoch": 0.4279429018380915, + "loss": 0.055334266275167465, + "loss_ce": 0.0027372180484235287, + "loss_iou": 0.27734375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 424443720, + "step": 4377 + }, + { + "epoch": 0.4280406726632773, + "grad_norm": 5.752237679392559, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 424539772, + "step": 4378 + }, + { + "epoch": 0.4280406726632773, + "loss": 0.11248612403869629, + "loss_ce": 0.0051710596308112144, + "loss_iou": 0.267578125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 424539772, + "step": 4378 + }, + { + "epoch": 0.428138443488463, + "grad_norm": 5.298890275993637, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 424635812, + "step": 4379 + }, + { + "epoch": 0.428138443488463, + "loss": 0.07355639338493347, + "loss_ce": 0.007028074935078621, + "loss_iou": 0.2294921875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 424635812, + "step": 4379 + }, + { + "epoch": 0.42823621431364883, + "grad_norm": 9.071159387891603, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 424734308, + "step": 4380 + }, + { + "epoch": 0.42823621431364883, + "loss": 0.1183490976691246, + "loss_ce": 0.007402443327009678, + "loss_iou": 0.26953125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 424734308, + "step": 4380 + }, + { + "epoch": 0.4283339851388346, + "grad_norm": 3.644484641120805, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 424831148, + "step": 4381 + }, + { + "epoch": 0.4283339851388346, + "loss": 0.09569823741912842, + "loss_ce": 0.005007622763514519, + "loss_iou": 0.22265625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 424831148, + "step": 4381 + }, + { + "epoch": 0.42843175596402033, + "grad_norm": 4.625150100546664, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 424928324, + "step": 4382 + }, + { + "epoch": 0.42843175596402033, + "loss": 0.06129010021686554, + "loss_ce": 0.002230958314612508, + "loss_iou": 0.302734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 424928324, + "step": 4382 + }, + { + "epoch": 0.4285295267892061, + "grad_norm": 5.877388846233216, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 425025992, + "step": 4383 + }, + { + "epoch": 0.4285295267892061, + "loss": 0.1317126452922821, + "loss_ce": 0.007811271585524082, + "loss_iou": 0.35546875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 425025992, + "step": 4383 + }, + { + "epoch": 0.4286272976143919, + "grad_norm": 10.211122061822058, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 425121464, + "step": 4384 + }, + { + "epoch": 0.4286272976143919, + "loss": 0.02619647979736328, + "loss_ce": 0.0038118374068289995, + "loss_iou": 0.173828125, + "loss_num": 0.004486083984375, + "loss_xval": 0.0223388671875, + "num_input_tokens_seen": 425121464, + "step": 4384 + }, + { + "epoch": 0.42872506843957764, + "grad_norm": 3.7031643210400476, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 425218984, + "step": 4385 + }, + { + "epoch": 0.42872506843957764, + "loss": 0.10436126589775085, + "loss_ce": 0.009360047988593578, + "loss_iou": 0.28515625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 425218984, + "step": 4385 + }, + { + "epoch": 0.4288228392647634, + "grad_norm": 16.273249290395572, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 425316344, + "step": 4386 + }, + { + "epoch": 0.4288228392647634, + "loss": 0.06487719714641571, + "loss_ce": 0.004940670914947987, + "loss_iou": 0.294921875, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 425316344, + "step": 4386 + }, + { + "epoch": 0.42892061008994914, + "grad_norm": 17.6811729299125, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 425413316, + "step": 4387 + }, + { + "epoch": 0.42892061008994914, + "loss": 0.10869479179382324, + "loss_ce": 0.007223843596875668, + "loss_iou": 0.44140625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 425413316, + "step": 4387 + }, + { + "epoch": 0.42901838091513494, + "grad_norm": 11.631369140412497, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 425510800, + "step": 4388 + }, + { + "epoch": 0.42901838091513494, + "loss": 0.08684371411800385, + "loss_ce": 0.003820638405159116, + "loss_iou": 0.373046875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 425510800, + "step": 4388 + }, + { + "epoch": 0.4291161517403207, + "grad_norm": 8.252125653154458, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 425608560, + "step": 4389 + }, + { + "epoch": 0.4291161517403207, + "loss": 0.056419115513563156, + "loss_ce": 0.0023572242353111506, + "loss_iou": 0.306640625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 425608560, + "step": 4389 + }, + { + "epoch": 0.42921392256550644, + "grad_norm": 13.359520988024164, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 425705880, + "step": 4390 + }, + { + "epoch": 0.42921392256550644, + "loss": 0.07183381170034409, + "loss_ce": 0.004496776033192873, + "loss_iou": 0.296875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 425705880, + "step": 4390 + }, + { + "epoch": 0.4293116933906922, + "grad_norm": 5.019406396782088, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 425802516, + "step": 4391 + }, + { + "epoch": 0.4293116933906922, + "loss": 0.10003595054149628, + "loss_ce": 0.003981872461736202, + "loss_iou": 0.349609375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 425802516, + "step": 4391 + }, + { + "epoch": 0.429409464215878, + "grad_norm": 4.662506379194952, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 425900416, + "step": 4392 + }, + { + "epoch": 0.429409464215878, + "loss": 0.09077456593513489, + "loss_ce": 0.008605989627540112, + "loss_iou": 0.3828125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 425900416, + "step": 4392 + }, + { + "epoch": 0.42950723504106375, + "grad_norm": 7.378710337452475, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 425997008, + "step": 4393 + }, + { + "epoch": 0.42950723504106375, + "loss": 0.14601007103919983, + "loss_ce": 0.00825372152030468, + "loss_iou": 0.31640625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 425997008, + "step": 4393 + }, + { + "epoch": 0.4296050058662495, + "grad_norm": 6.815663692747447, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 426094176, + "step": 4394 + }, + { + "epoch": 0.4296050058662495, + "loss": 0.08818955719470978, + "loss_ce": 0.004785018041729927, + "loss_iou": 0.283203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 426094176, + "step": 4394 + }, + { + "epoch": 0.42970277669143525, + "grad_norm": 7.558633991340826, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 426191092, + "step": 4395 + }, + { + "epoch": 0.42970277669143525, + "loss": 0.08839981257915497, + "loss_ce": 0.008733665570616722, + "loss_iou": 0.294921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 426191092, + "step": 4395 + }, + { + "epoch": 0.42980054751662106, + "grad_norm": 3.5218931737328933, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 426288100, + "step": 4396 + }, + { + "epoch": 0.42980054751662106, + "loss": 0.07453376054763794, + "loss_ce": 0.006342232692986727, + "loss_iou": 0.279296875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 426288100, + "step": 4396 + }, + { + "epoch": 0.4298983183418068, + "grad_norm": 8.838478079354038, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 426385504, + "step": 4397 + }, + { + "epoch": 0.4298983183418068, + "loss": 0.0654301717877388, + "loss_ce": 0.005608091130852699, + "loss_iou": 0.31640625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 426385504, + "step": 4397 + }, + { + "epoch": 0.42999608916699256, + "grad_norm": 29.496631293828937, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 426480612, + "step": 4398 + }, + { + "epoch": 0.42999608916699256, + "loss": 0.10570955276489258, + "loss_ce": 0.00662660738453269, + "loss_iou": 0.22265625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 426480612, + "step": 4398 + }, + { + "epoch": 0.4300938599921783, + "grad_norm": 8.723463471907726, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 426577600, + "step": 4399 + }, + { + "epoch": 0.4300938599921783, + "loss": 0.10345019400119781, + "loss_ce": 0.003413574304431677, + "loss_iou": 0.248046875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 426577600, + "step": 4399 + }, + { + "epoch": 0.4301916308173641, + "grad_norm": 3.4269814212067202, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 426674660, + "step": 4400 + }, + { + "epoch": 0.4301916308173641, + "loss": 0.04624273255467415, + "loss_ce": 0.004349729511886835, + "loss_iou": 0.26953125, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 426674660, + "step": 4400 + }, + { + "epoch": 0.43028940164254986, + "grad_norm": 3.1777982839770904, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 426771640, + "step": 4401 + }, + { + "epoch": 0.43028940164254986, + "loss": 0.10788913071155548, + "loss_ce": 0.0034579753410071135, + "loss_iou": 0.287109375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 426771640, + "step": 4401 + }, + { + "epoch": 0.4303871724677356, + "grad_norm": 2.9996373173568918, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 426869168, + "step": 4402 + }, + { + "epoch": 0.4303871724677356, + "loss": 0.08849942684173584, + "loss_ce": 0.004148844629526138, + "loss_iou": 0.30078125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 426869168, + "step": 4402 + }, + { + "epoch": 0.4304849432929214, + "grad_norm": 5.252805150814176, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 426966040, + "step": 4403 + }, + { + "epoch": 0.4304849432929214, + "loss": 0.07646152377128601, + "loss_ce": 0.0026318682357668877, + "loss_iou": 0.2890625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 426966040, + "step": 4403 + }, + { + "epoch": 0.43058271411810717, + "grad_norm": 10.239360330557442, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 427063604, + "step": 4404 + }, + { + "epoch": 0.43058271411810717, + "loss": 0.06275898963212967, + "loss_ce": 0.004302570130676031, + "loss_iou": 0.310546875, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 427063604, + "step": 4404 + }, + { + "epoch": 0.4306804849432929, + "grad_norm": 12.095613305252815, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 427161352, + "step": 4405 + }, + { + "epoch": 0.4306804849432929, + "loss": 0.06318323314189911, + "loss_ce": 0.00420800969004631, + "loss_iou": 0.419921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 427161352, + "step": 4405 + }, + { + "epoch": 0.43077825576847867, + "grad_norm": 7.276738026195203, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 427258016, + "step": 4406 + }, + { + "epoch": 0.43077825576847867, + "loss": 0.05038828030228615, + "loss_ce": 0.0051307110115885735, + "loss_iou": 0.30859375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 427258016, + "step": 4406 + }, + { + "epoch": 0.4308760265936645, + "grad_norm": 19.97132126344462, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 427355760, + "step": 4407 + }, + { + "epoch": 0.4308760265936645, + "loss": 0.05152326449751854, + "loss_ce": 0.007242257706820965, + "loss_iou": 0.267578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 427355760, + "step": 4407 + }, + { + "epoch": 0.4309737974188502, + "grad_norm": 7.369045556771297, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 427452576, + "step": 4408 + }, + { + "epoch": 0.4309737974188502, + "loss": 0.051326312124729156, + "loss_ce": 0.00640443991869688, + "loss_iou": 0.2890625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 427452576, + "step": 4408 + }, + { + "epoch": 0.431071568244036, + "grad_norm": 13.131402169374878, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 427549648, + "step": 4409 + }, + { + "epoch": 0.431071568244036, + "loss": 0.15791141986846924, + "loss_ce": 0.008741491474211216, + "loss_iou": 0.3984375, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 427549648, + "step": 4409 + }, + { + "epoch": 0.4311693390692217, + "grad_norm": 8.620842148172072, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 427646892, + "step": 4410 + }, + { + "epoch": 0.4311693390692217, + "loss": 0.07292921841144562, + "loss_ce": 0.004722435027360916, + "loss_iou": 0.251953125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 427646892, + "step": 4410 + }, + { + "epoch": 0.43126710989440753, + "grad_norm": 2.3699413202618502, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 427744844, + "step": 4411 + }, + { + "epoch": 0.43126710989440753, + "loss": 0.0859704464673996, + "loss_ce": 0.002108142711222172, + "loss_iou": 0.365234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 427744844, + "step": 4411 + }, + { + "epoch": 0.4313648807195933, + "grad_norm": 8.62872833730217, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 427841360, + "step": 4412 + }, + { + "epoch": 0.4313648807195933, + "loss": 0.05272083729505539, + "loss_ce": 0.006074718665331602, + "loss_iou": 0.32421875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 427841360, + "step": 4412 + }, + { + "epoch": 0.43146265154477903, + "grad_norm": 3.742365068225639, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 427938640, + "step": 4413 + }, + { + "epoch": 0.43146265154477903, + "loss": 0.0900714099407196, + "loss_ce": 0.005995487794280052, + "loss_iou": 0.267578125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 427938640, + "step": 4413 + }, + { + "epoch": 0.4315604223699648, + "grad_norm": 14.91920146127941, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 428034036, + "step": 4414 + }, + { + "epoch": 0.4315604223699648, + "loss": 0.06075626239180565, + "loss_ce": 0.006100233644247055, + "loss_iou": 0.1318359375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 428034036, + "step": 4414 + }, + { + "epoch": 0.4316581931951506, + "grad_norm": 2.310290252703083, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 428130896, + "step": 4415 + }, + { + "epoch": 0.4316581931951506, + "loss": 0.054855410009622574, + "loss_ce": 0.006126462947577238, + "loss_iou": 0.19140625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 428130896, + "step": 4415 + }, + { + "epoch": 0.43175596402033634, + "grad_norm": 8.414275818118771, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 428227668, + "step": 4416 + }, + { + "epoch": 0.43175596402033634, + "loss": 0.06286482512950897, + "loss_ce": 0.0049119386821985245, + "loss_iou": 0.345703125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 428227668, + "step": 4416 + }, + { + "epoch": 0.4318537348455221, + "grad_norm": 5.052648919484213, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 428324232, + "step": 4417 + }, + { + "epoch": 0.4318537348455221, + "loss": 0.05582624673843384, + "loss_ce": 0.004846632946282625, + "loss_iou": 0.208984375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 428324232, + "step": 4417 + }, + { + "epoch": 0.43195150567070784, + "grad_norm": 19.81687864565479, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 428420604, + "step": 4418 + }, + { + "epoch": 0.43195150567070784, + "loss": 0.05419984832406044, + "loss_ce": 0.002960836049169302, + "loss_iou": 0.302734375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 428420604, + "step": 4418 + }, + { + "epoch": 0.43204927649589364, + "grad_norm": 11.791169791240879, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 428517236, + "step": 4419 + }, + { + "epoch": 0.43204927649589364, + "loss": 0.07273314893245697, + "loss_ce": 0.004083859734237194, + "loss_iou": 0.26171875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 428517236, + "step": 4419 + }, + { + "epoch": 0.4321470473210794, + "grad_norm": 10.063662911437573, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 428614692, + "step": 4420 + }, + { + "epoch": 0.4321470473210794, + "loss": 0.0645381435751915, + "loss_ce": 0.005166199058294296, + "loss_iou": 0.302734375, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 428614692, + "step": 4420 + }, + { + "epoch": 0.43224481814626514, + "grad_norm": 19.48894253375992, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 428711360, + "step": 4421 + }, + { + "epoch": 0.43224481814626514, + "loss": 0.07427091151475906, + "loss_ce": 0.0028750342316925526, + "loss_iou": 0.328125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 428711360, + "step": 4421 + }, + { + "epoch": 0.4323425889714509, + "grad_norm": 42.87687431942587, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 428808448, + "step": 4422 + }, + { + "epoch": 0.4323425889714509, + "loss": 0.09864494949579239, + "loss_ce": 0.0026976787485182285, + "loss_iou": 0.359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 428808448, + "step": 4422 + }, + { + "epoch": 0.4324403597966367, + "grad_norm": 7.807553237298168, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 428905900, + "step": 4423 + }, + { + "epoch": 0.4324403597966367, + "loss": 0.09501263499259949, + "loss_ce": 0.006374320946633816, + "loss_iou": 0.263671875, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 428905900, + "step": 4423 + }, + { + "epoch": 0.43253813062182245, + "grad_norm": 15.95564545161763, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 429002908, + "step": 4424 + }, + { + "epoch": 0.43253813062182245, + "loss": 0.087257981300354, + "loss_ce": 0.0040060230530798435, + "loss_iou": 0.2578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 429002908, + "step": 4424 + }, + { + "epoch": 0.4326359014470082, + "grad_norm": 2.3170576558628797, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 429100500, + "step": 4425 + }, + { + "epoch": 0.4326359014470082, + "loss": 0.12600359320640564, + "loss_ce": 0.00368913309648633, + "loss_iou": 0.296875, + "loss_num": 0.0245361328125, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 429100500, + "step": 4425 + }, + { + "epoch": 0.432733672272194, + "grad_norm": 7.544642597614895, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 429198072, + "step": 4426 + }, + { + "epoch": 0.432733672272194, + "loss": 0.11220046877861023, + "loss_ce": 0.007616729475557804, + "loss_iou": 0.37109375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 429198072, + "step": 4426 + }, + { + "epoch": 0.43283144309737975, + "grad_norm": 5.1948222709922645, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 429295428, + "step": 4427 + }, + { + "epoch": 0.43283144309737975, + "loss": 0.10082019865512848, + "loss_ce": 0.0077110715210437775, + "loss_iou": 0.291015625, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 429295428, + "step": 4427 + }, + { + "epoch": 0.4329292139225655, + "grad_norm": 10.692701353222919, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 429392064, + "step": 4428 + }, + { + "epoch": 0.4329292139225655, + "loss": 0.05003386735916138, + "loss_ce": 0.005340876057744026, + "loss_iou": 0.2490234375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 429392064, + "step": 4428 + }, + { + "epoch": 0.43302698474775125, + "grad_norm": 10.540105958981718, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 429489236, + "step": 4429 + }, + { + "epoch": 0.43302698474775125, + "loss": 0.16094276309013367, + "loss_ce": 0.007668226957321167, + "loss_iou": 0.26953125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 429489236, + "step": 4429 + }, + { + "epoch": 0.43312475557293706, + "grad_norm": 3.734038531601081, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 429586752, + "step": 4430 + }, + { + "epoch": 0.43312475557293706, + "loss": 0.08291532099246979, + "loss_ce": 0.007597936317324638, + "loss_iou": 0.298828125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 429586752, + "step": 4430 + }, + { + "epoch": 0.4332225263981228, + "grad_norm": 1.400241063620843, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 429684008, + "step": 4431 + }, + { + "epoch": 0.4332225263981228, + "loss": 0.07602181285619736, + "loss_ce": 0.004191058687865734, + "loss_iou": 0.251953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 429684008, + "step": 4431 + }, + { + "epoch": 0.43332029722330856, + "grad_norm": 2.513325106971147, + "learning_rate": 5e-05, + "loss": 0.042, + "num_input_tokens_seen": 429780612, + "step": 4432 + }, + { + "epoch": 0.43332029722330856, + "loss": 0.030896257609128952, + "loss_ce": 0.0040198080241680145, + "loss_iou": 0.248046875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 429780612, + "step": 4432 + }, + { + "epoch": 0.4334180680484943, + "grad_norm": 8.164135137080638, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 429877644, + "step": 4433 + }, + { + "epoch": 0.4334180680484943, + "loss": 0.1433185189962387, + "loss_ce": 0.010139808990061283, + "loss_iou": 0.365234375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 429877644, + "step": 4433 + }, + { + "epoch": 0.4335158388736801, + "grad_norm": 29.34164394256731, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 429974356, + "step": 4434 + }, + { + "epoch": 0.4335158388736801, + "loss": 0.0973743349313736, + "loss_ce": 0.004784009885042906, + "loss_iou": 0.34375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 429974356, + "step": 4434 + }, + { + "epoch": 0.43361360969886587, + "grad_norm": 10.600771194632287, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 430071760, + "step": 4435 + }, + { + "epoch": 0.43361360969886587, + "loss": 0.07705722749233246, + "loss_ce": 0.00687156617641449, + "loss_iou": 0.328125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 430071760, + "step": 4435 + }, + { + "epoch": 0.4337113805240516, + "grad_norm": 5.663699912026772, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 430168524, + "step": 4436 + }, + { + "epoch": 0.4337113805240516, + "loss": 0.07737770676612854, + "loss_ce": 0.004837420769035816, + "loss_iou": 0.373046875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 430168524, + "step": 4436 + }, + { + "epoch": 0.43380915134923737, + "grad_norm": 5.026273085981906, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 430264964, + "step": 4437 + }, + { + "epoch": 0.43380915134923737, + "loss": 0.044491756707429886, + "loss_ce": 0.0031862130854278803, + "loss_iou": 0.326171875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 430264964, + "step": 4437 + }, + { + "epoch": 0.43390692217442317, + "grad_norm": 5.477043273836816, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 430361432, + "step": 4438 + }, + { + "epoch": 0.43390692217442317, + "loss": 0.0786576047539711, + "loss_ce": 0.0027756495401263237, + "loss_iou": 0.326171875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 430361432, + "step": 4438 + }, + { + "epoch": 0.4340046929996089, + "grad_norm": 5.211719064552949, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 430459600, + "step": 4439 + }, + { + "epoch": 0.4340046929996089, + "loss": 0.12461424618959427, + "loss_ce": 0.006755359470844269, + "loss_iou": 0.2890625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 430459600, + "step": 4439 + }, + { + "epoch": 0.43410246382479467, + "grad_norm": 22.6266375020563, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 430557176, + "step": 4440 + }, + { + "epoch": 0.43410246382479467, + "loss": 0.11823049932718277, + "loss_ce": 0.008489282801747322, + "loss_iou": 0.35546875, + "loss_num": 0.0218505859375, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 430557176, + "step": 4440 + }, + { + "epoch": 0.4342002346499804, + "grad_norm": 17.5163759725068, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 430654404, + "step": 4441 + }, + { + "epoch": 0.4342002346499804, + "loss": 0.07199829071760178, + "loss_ce": 0.004768067039549351, + "loss_iou": 0.384765625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 430654404, + "step": 4441 + }, + { + "epoch": 0.4342980054751662, + "grad_norm": 10.022359632139711, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 430750932, + "step": 4442 + }, + { + "epoch": 0.4342980054751662, + "loss": 0.11840847134590149, + "loss_ce": 0.008209498599171638, + "loss_iou": 0.349609375, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 430750932, + "step": 4442 + }, + { + "epoch": 0.434395776300352, + "grad_norm": 3.8322605343766596, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 430848152, + "step": 4443 + }, + { + "epoch": 0.434395776300352, + "loss": 0.11733930557966232, + "loss_ce": 0.010237868875265121, + "loss_iou": 0.24609375, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 430848152, + "step": 4443 + }, + { + "epoch": 0.4344935471255377, + "grad_norm": 4.252807464541821, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 430945216, + "step": 4444 + }, + { + "epoch": 0.4344935471255377, + "loss": 0.11803822964429855, + "loss_ce": 0.009471945464611053, + "loss_iou": 0.166015625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 430945216, + "step": 4444 + }, + { + "epoch": 0.4345913179507235, + "grad_norm": 2.608406485789666, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 431042968, + "step": 4445 + }, + { + "epoch": 0.4345913179507235, + "loss": 0.06314563751220703, + "loss_ce": 0.003819460514932871, + "loss_iou": 0.26953125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 431042968, + "step": 4445 + }, + { + "epoch": 0.4346890887759093, + "grad_norm": 7.014635176480863, + "learning_rate": 5e-05, + "loss": 0.1308, + "num_input_tokens_seen": 431139432, + "step": 4446 + }, + { + "epoch": 0.4346890887759093, + "loss": 0.1586100310087204, + "loss_ce": 0.004496261477470398, + "loss_iou": 0.31640625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 431139432, + "step": 4446 + }, + { + "epoch": 0.43478685960109503, + "grad_norm": 10.010511799335434, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 431237188, + "step": 4447 + }, + { + "epoch": 0.43478685960109503, + "loss": 0.07108282297849655, + "loss_ce": 0.005035155452787876, + "loss_iou": 0.3515625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 431237188, + "step": 4447 + }, + { + "epoch": 0.4348846304262808, + "grad_norm": 14.614856843520762, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 431335096, + "step": 4448 + }, + { + "epoch": 0.4348846304262808, + "loss": 0.05473899096250534, + "loss_ce": 0.0022563845850527287, + "loss_iou": 0.36328125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 431335096, + "step": 4448 + }, + { + "epoch": 0.4349824012514666, + "grad_norm": 14.858007582913235, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 431432084, + "step": 4449 + }, + { + "epoch": 0.4349824012514666, + "loss": 0.050650596618652344, + "loss_ce": 0.004553795792162418, + "loss_iou": 0.275390625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 431432084, + "step": 4449 + }, + { + "epoch": 0.43508017207665234, + "grad_norm": 8.84164945977025, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 431528720, + "step": 4450 + }, + { + "epoch": 0.43508017207665234, + "loss": 0.07640984654426575, + "loss_ce": 0.004937686026096344, + "loss_iou": 0.2578125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 431528720, + "step": 4450 + }, + { + "epoch": 0.4351779429018381, + "grad_norm": 6.546917659007804, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 431625596, + "step": 4451 + }, + { + "epoch": 0.4351779429018381, + "loss": 0.07887868583202362, + "loss_ce": 0.005819610320031643, + "loss_iou": 0.37109375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 431625596, + "step": 4451 + }, + { + "epoch": 0.43527571372702384, + "grad_norm": 8.917577097933833, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 431721944, + "step": 4452 + }, + { + "epoch": 0.43527571372702384, + "loss": 0.07194145023822784, + "loss_ce": 0.003826208645477891, + "loss_iou": 0.25390625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 431721944, + "step": 4452 + }, + { + "epoch": 0.43537348455220964, + "grad_norm": 22.162315532738003, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 431819104, + "step": 4453 + }, + { + "epoch": 0.43537348455220964, + "loss": 0.09501144289970398, + "loss_ce": 0.006739352829754353, + "loss_iou": 0.3359375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 431819104, + "step": 4453 + }, + { + "epoch": 0.4354712553773954, + "grad_norm": 17.264049352806712, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 431915512, + "step": 4454 + }, + { + "epoch": 0.4354712553773954, + "loss": 0.13660769164562225, + "loss_ce": 0.008281276561319828, + "loss_iou": 0.3125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 431915512, + "step": 4454 + }, + { + "epoch": 0.43556902620258114, + "grad_norm": 6.407320299807207, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 432012504, + "step": 4455 + }, + { + "epoch": 0.43556902620258114, + "loss": 0.07691404223442078, + "loss_ce": 0.00661679869517684, + "loss_iou": 0.265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 432012504, + "step": 4455 + }, + { + "epoch": 0.4356667970277669, + "grad_norm": 9.782591653272313, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 432109184, + "step": 4456 + }, + { + "epoch": 0.4356667970277669, + "loss": 0.06653466820716858, + "loss_ce": 0.002035771729424596, + "loss_iou": 0.271484375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 432109184, + "step": 4456 + }, + { + "epoch": 0.4357645678529527, + "grad_norm": 7.829552930314732, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 432206144, + "step": 4457 + }, + { + "epoch": 0.4357645678529527, + "loss": 0.02553040161728859, + "loss_ce": 0.004250114317983389, + "loss_iou": 0.275390625, + "loss_num": 0.004241943359375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 432206144, + "step": 4457 + }, + { + "epoch": 0.43586233867813845, + "grad_norm": 3.3855907440490043, + "learning_rate": 5e-05, + "loss": 0.1272, + "num_input_tokens_seen": 432304336, + "step": 4458 + }, + { + "epoch": 0.43586233867813845, + "loss": 0.14416559040546417, + "loss_ce": 0.0039068046025931835, + "loss_iou": 0.2578125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 432304336, + "step": 4458 + }, + { + "epoch": 0.4359601095033242, + "grad_norm": 27.879039968007124, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 432400068, + "step": 4459 + }, + { + "epoch": 0.4359601095033242, + "loss": 0.09336089342832565, + "loss_ce": 0.006164541933685541, + "loss_iou": 0.478515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 432400068, + "step": 4459 + }, + { + "epoch": 0.43605788032850995, + "grad_norm": 17.176567686535048, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 432497140, + "step": 4460 + }, + { + "epoch": 0.43605788032850995, + "loss": 0.07817502319812775, + "loss_ce": 0.006046721711754799, + "loss_iou": 0.337890625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 432497140, + "step": 4460 + }, + { + "epoch": 0.43615565115369576, + "grad_norm": 4.0120172259516425, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 432593632, + "step": 4461 + }, + { + "epoch": 0.43615565115369576, + "loss": 0.06303079426288605, + "loss_ce": 0.0032468580175191164, + "loss_iou": 0.287109375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 432593632, + "step": 4461 + }, + { + "epoch": 0.4362534219788815, + "grad_norm": 3.051842080391557, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 432690688, + "step": 4462 + }, + { + "epoch": 0.4362534219788815, + "loss": 0.1059708297252655, + "loss_ce": 0.005522225052118301, + "loss_iou": 0.359375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 432690688, + "step": 4462 + }, + { + "epoch": 0.43635119280406726, + "grad_norm": 17.086322334184047, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 432788084, + "step": 4463 + }, + { + "epoch": 0.43635119280406726, + "loss": 0.0703459084033966, + "loss_ce": 0.005305319093167782, + "loss_iou": 0.328125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 432788084, + "step": 4463 + }, + { + "epoch": 0.436448963629253, + "grad_norm": 14.670100180168788, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 432884504, + "step": 4464 + }, + { + "epoch": 0.436448963629253, + "loss": 0.08066913485527039, + "loss_ce": 0.003764832392334938, + "loss_iou": 0.298828125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 432884504, + "step": 4464 + }, + { + "epoch": 0.4365467344544388, + "grad_norm": 3.023947231019086, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 432981448, + "step": 4465 + }, + { + "epoch": 0.4365467344544388, + "loss": 0.11214201152324677, + "loss_ce": 0.010693958029150963, + "loss_iou": 0.22265625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 432981448, + "step": 4465 + }, + { + "epoch": 0.43664450527962456, + "grad_norm": 9.413827201969761, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 433077516, + "step": 4466 + }, + { + "epoch": 0.43664450527962456, + "loss": 0.1004626527428627, + "loss_ce": 0.002035834128037095, + "loss_iou": 0.2431640625, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 433077516, + "step": 4466 + }, + { + "epoch": 0.4367422761048103, + "grad_norm": 8.33632021734631, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 433173208, + "step": 4467 + }, + { + "epoch": 0.4367422761048103, + "loss": 0.07766810059547424, + "loss_ce": 0.0035027549602091312, + "loss_iou": 0.333984375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 433173208, + "step": 4467 + }, + { + "epoch": 0.43684004692999606, + "grad_norm": 6.815598347518612, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 433270124, + "step": 4468 + }, + { + "epoch": 0.43684004692999606, + "loss": 0.10535988956689835, + "loss_ce": 0.005147787742316723, + "loss_iou": 0.322265625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 433270124, + "step": 4468 + }, + { + "epoch": 0.43693781775518187, + "grad_norm": 18.829764136507155, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 433365464, + "step": 4469 + }, + { + "epoch": 0.43693781775518187, + "loss": 0.11150005459785461, + "loss_ce": 0.005497246980667114, + "loss_iou": 0.275390625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 433365464, + "step": 4469 + }, + { + "epoch": 0.4370355885803676, + "grad_norm": 16.11177661657015, + "learning_rate": 5e-05, + "loss": 0.1212, + "num_input_tokens_seen": 433461892, + "step": 4470 + }, + { + "epoch": 0.4370355885803676, + "loss": 0.1407364159822464, + "loss_ce": 0.00911409966647625, + "loss_iou": 0.314453125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 433461892, + "step": 4470 + }, + { + "epoch": 0.43713335940555337, + "grad_norm": 4.733004015725647, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 433558940, + "step": 4471 + }, + { + "epoch": 0.43713335940555337, + "loss": 0.07555006444454193, + "loss_ce": 0.00333021255210042, + "loss_iou": 0.380859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 433558940, + "step": 4471 + }, + { + "epoch": 0.4372311302307392, + "grad_norm": 5.155944538640125, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 433656540, + "step": 4472 + }, + { + "epoch": 0.4372311302307392, + "loss": 0.09373778104782104, + "loss_ce": 0.0025359969586133957, + "loss_iou": 0.337890625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 433656540, + "step": 4472 + }, + { + "epoch": 0.4373289010559249, + "grad_norm": 8.979427120005255, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 433753700, + "step": 4473 + }, + { + "epoch": 0.4373289010559249, + "loss": 0.0925072655081749, + "loss_ce": 0.008904357440769672, + "loss_iou": 0.283203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 433753700, + "step": 4473 + }, + { + "epoch": 0.4374266718811107, + "grad_norm": 8.016088230714695, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 433850544, + "step": 4474 + }, + { + "epoch": 0.4374266718811107, + "loss": 0.09692458808422089, + "loss_ce": 0.0032356190495193005, + "loss_iou": 0.302734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 433850544, + "step": 4474 + }, + { + "epoch": 0.4375244427062964, + "grad_norm": 5.058321577810367, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 433947384, + "step": 4475 + }, + { + "epoch": 0.4375244427062964, + "loss": 0.06976181268692017, + "loss_ce": 0.005369728431105614, + "loss_iou": 0.39453125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 433947384, + "step": 4475 + }, + { + "epoch": 0.43762221353148223, + "grad_norm": 3.694565497236437, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 434044764, + "step": 4476 + }, + { + "epoch": 0.43762221353148223, + "loss": 0.05281709507107735, + "loss_ce": 0.0033938768319785595, + "loss_iou": 0.337890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 434044764, + "step": 4476 + }, + { + "epoch": 0.437719984356668, + "grad_norm": 3.3100926334244263, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 434141388, + "step": 4477 + }, + { + "epoch": 0.437719984356668, + "loss": 0.04999971762299538, + "loss_ce": 0.004452231340110302, + "loss_iou": 0.298828125, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 434141388, + "step": 4477 + }, + { + "epoch": 0.43781775518185373, + "grad_norm": 6.4592694749535475, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 434238912, + "step": 4478 + }, + { + "epoch": 0.43781775518185373, + "loss": 0.0997927114367485, + "loss_ce": 0.007126089185476303, + "loss_iou": 0.26953125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 434238912, + "step": 4478 + }, + { + "epoch": 0.4379155260070395, + "grad_norm": 9.147581431840631, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 434335556, + "step": 4479 + }, + { + "epoch": 0.4379155260070395, + "loss": 0.06992773711681366, + "loss_ce": 0.009861513040959835, + "loss_iou": 0.2333984375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 434335556, + "step": 4479 + }, + { + "epoch": 0.4380132968322253, + "grad_norm": 22.294344737522376, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 434433584, + "step": 4480 + }, + { + "epoch": 0.4380132968322253, + "loss": 0.08748818933963776, + "loss_ce": 0.0021305254194885492, + "loss_iou": 0.3125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 434433584, + "step": 4480 + }, + { + "epoch": 0.43811106765741104, + "grad_norm": 21.73242950429402, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 434531340, + "step": 4481 + }, + { + "epoch": 0.43811106765741104, + "loss": 0.08996306359767914, + "loss_ce": 0.005238635465502739, + "loss_iou": 0.3046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 434531340, + "step": 4481 + }, + { + "epoch": 0.4382088384825968, + "grad_norm": 4.609331977106702, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 434627736, + "step": 4482 + }, + { + "epoch": 0.4382088384825968, + "loss": 0.07437905669212341, + "loss_ce": 0.0065384795889258385, + "loss_iou": 0.337890625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 434627736, + "step": 4482 + }, + { + "epoch": 0.43830660930778254, + "grad_norm": 4.583220825554296, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 434724252, + "step": 4483 + }, + { + "epoch": 0.43830660930778254, + "loss": 0.07109048962593079, + "loss_ce": 0.004203589633107185, + "loss_iou": 0.328125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 434724252, + "step": 4483 + }, + { + "epoch": 0.43840438013296834, + "grad_norm": 5.870183629169676, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 434821368, + "step": 4484 + }, + { + "epoch": 0.43840438013296834, + "loss": 0.09354789555072784, + "loss_ce": 0.0076409075409173965, + "loss_iou": 0.314453125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 434821368, + "step": 4484 + }, + { + "epoch": 0.4385021509581541, + "grad_norm": 3.610133250906347, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 434918556, + "step": 4485 + }, + { + "epoch": 0.4385021509581541, + "loss": 0.11430172622203827, + "loss_ce": 0.005460778716951609, + "loss_iou": 0.384765625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 434918556, + "step": 4485 + }, + { + "epoch": 0.43859992178333984, + "grad_norm": 15.127435021998012, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 435015844, + "step": 4486 + }, + { + "epoch": 0.43859992178333984, + "loss": 0.12129851430654526, + "loss_ce": 0.008063042536377907, + "loss_iou": 0.251953125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 435015844, + "step": 4486 + }, + { + "epoch": 0.4386976926085256, + "grad_norm": 15.13041883697943, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 435112388, + "step": 4487 + }, + { + "epoch": 0.4386976926085256, + "loss": 0.1063331589102745, + "loss_ce": 0.005247492343187332, + "loss_iou": 0.263671875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 435112388, + "step": 4487 + }, + { + "epoch": 0.4387954634337114, + "grad_norm": 5.615547385732356, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 435211088, + "step": 4488 + }, + { + "epoch": 0.4387954634337114, + "loss": 0.08136369287967682, + "loss_ce": 0.005985272116959095, + "loss_iou": 0.466796875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 435211088, + "step": 4488 + }, + { + "epoch": 0.43889323425889715, + "grad_norm": 4.498407361347769, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 435308552, + "step": 4489 + }, + { + "epoch": 0.43889323425889715, + "loss": 0.08816787600517273, + "loss_ce": 0.005892487242817879, + "loss_iou": 0.30859375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 435308552, + "step": 4489 + }, + { + "epoch": 0.4389910050840829, + "grad_norm": 6.662686760907059, + "learning_rate": 5e-05, + "loss": 0.1324, + "num_input_tokens_seen": 435405412, + "step": 4490 + }, + { + "epoch": 0.4389910050840829, + "loss": 0.12216045707464218, + "loss_ce": 0.009565855376422405, + "loss_iou": 0.333984375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 435405412, + "step": 4490 + }, + { + "epoch": 0.43908877590926865, + "grad_norm": 21.386010771851442, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 435502336, + "step": 4491 + }, + { + "epoch": 0.43908877590926865, + "loss": 0.07780680805444717, + "loss_ce": 0.006670334376394749, + "loss_iou": 0.376953125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 435502336, + "step": 4491 + }, + { + "epoch": 0.43918654673445445, + "grad_norm": 18.94707731814266, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 435599868, + "step": 4492 + }, + { + "epoch": 0.43918654673445445, + "loss": 0.046835847198963165, + "loss_ce": 0.008753722533583641, + "loss_iou": 0.330078125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 435599868, + "step": 4492 + }, + { + "epoch": 0.4392843175596402, + "grad_norm": 7.313416306486843, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 435696200, + "step": 4493 + }, + { + "epoch": 0.4392843175596402, + "loss": 0.08307886123657227, + "loss_ce": 0.009439944289624691, + "loss_iou": 0.359375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 435696200, + "step": 4493 + }, + { + "epoch": 0.43938208838482595, + "grad_norm": 14.794165436283526, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 435793108, + "step": 4494 + }, + { + "epoch": 0.43938208838482595, + "loss": 0.12491365522146225, + "loss_ce": 0.003209553426131606, + "loss_iou": 0.28515625, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 435793108, + "step": 4494 + }, + { + "epoch": 0.43947985921001176, + "grad_norm": 4.565517181160467, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 435889968, + "step": 4495 + }, + { + "epoch": 0.43947985921001176, + "loss": 0.08557188510894775, + "loss_ce": 0.00614988012239337, + "loss_iou": 0.3515625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 435889968, + "step": 4495 + }, + { + "epoch": 0.4395776300351975, + "grad_norm": 3.7978436127124953, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 435987056, + "step": 4496 + }, + { + "epoch": 0.4395776300351975, + "loss": 0.0451510027050972, + "loss_ce": 0.004852541722357273, + "loss_iou": 0.28125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 435987056, + "step": 4496 + }, + { + "epoch": 0.43967540086038326, + "grad_norm": 5.392589294034898, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 436084656, + "step": 4497 + }, + { + "epoch": 0.43967540086038326, + "loss": 0.06845897436141968, + "loss_ce": 0.003799860831350088, + "loss_iou": 0.2255859375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 436084656, + "step": 4497 + }, + { + "epoch": 0.439773171685569, + "grad_norm": 9.653365473971327, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 436181424, + "step": 4498 + }, + { + "epoch": 0.439773171685569, + "loss": 0.05667315050959587, + "loss_ce": 0.002687552012503147, + "loss_iou": 0.2294921875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 436181424, + "step": 4498 + }, + { + "epoch": 0.4398709425107548, + "grad_norm": 7.534179397313113, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 436278252, + "step": 4499 + }, + { + "epoch": 0.4398709425107548, + "loss": 0.12024350464344025, + "loss_ce": 0.011341528967022896, + "loss_iou": 0.384765625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 436278252, + "step": 4499 + }, + { + "epoch": 0.43996871333594056, + "grad_norm": 4.411912816395655, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 436375116, + "step": 4500 + }, + { + "epoch": 0.43996871333594056, + "eval_seeclick_CIoU": 0.5483672916889191, + "eval_seeclick_GIoU": 0.5531973540782928, + "eval_seeclick_IoU": 0.5886878371238708, + "eval_seeclick_MAE_all": 0.0735628791153431, + "eval_seeclick_MAE_h": 0.03182427957653999, + "eval_seeclick_MAE_w": 0.12206161022186279, + "eval_seeclick_MAE_x": 0.11046116426587105, + "eval_seeclick_MAE_y": 0.029904447495937347, + "eval_seeclick_NUM_probability": 0.9999946057796478, + "eval_seeclick_inside_bbox": 0.7997159063816071, + "eval_seeclick_loss": 0.27298232913017273, + "eval_seeclick_loss_ce": 0.010127364192157984, + "eval_seeclick_loss_iou": 0.4383544921875, + "eval_seeclick_loss_num": 0.054168701171875, + "eval_seeclick_loss_xval": 0.270965576171875, + "eval_seeclick_runtime": 98.9008, + "eval_seeclick_samples_per_second": 0.435, + "eval_seeclick_steps_per_second": 0.02, + "num_input_tokens_seen": 436375116, + "step": 4500 + }, + { + "epoch": 0.43996871333594056, + "eval_icons_CIoU": 0.6296269297599792, + "eval_icons_GIoU": 0.6286346912384033, + "eval_icons_IoU": 0.6640613973140717, + "eval_icons_MAE_all": 0.0685550644993782, + "eval_icons_MAE_h": 0.07993324846029282, + "eval_icons_MAE_w": 0.05793263576924801, + "eval_icons_MAE_x": 0.05894321948289871, + "eval_icons_MAE_y": 0.07741114869713783, + "eval_icons_NUM_probability": 0.9999931454658508, + "eval_icons_inside_bbox": 0.8194444477558136, + "eval_icons_loss": 0.21014027297496796, + "eval_icons_loss_ce": 3.4877015195888816e-06, + "eval_icons_loss_iou": 0.34619140625, + "eval_icons_loss_num": 0.04311180114746094, + "eval_icons_loss_xval": 0.2156219482421875, + "eval_icons_runtime": 87.3081, + "eval_icons_samples_per_second": 0.573, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 436375116, + "step": 4500 + }, + { + "epoch": 0.43996871333594056, + "eval_screenspot_CIoU": 0.28630157311757404, + "eval_screenspot_GIoU": 0.26266111185153324, + "eval_screenspot_IoU": 0.38229315479596454, + "eval_screenspot_MAE_all": 0.15944962203502655, + "eval_screenspot_MAE_h": 0.11705680688222249, + "eval_screenspot_MAE_w": 0.21146256724993387, + "eval_screenspot_MAE_x": 0.1913601209719976, + "eval_screenspot_MAE_y": 0.11791898061831792, + "eval_screenspot_NUM_probability": 0.9999888936678568, + "eval_screenspot_inside_bbox": 0.6254166762034098, + "eval_screenspot_loss": 0.5670341849327087, + "eval_screenspot_loss_ce": 0.020321466339131195, + "eval_screenspot_loss_iou": 0.3517252604166667, + "eval_screenspot_loss_num": 0.10993448893229167, + "eval_screenspot_loss_xval": 0.5492350260416666, + "eval_screenspot_runtime": 153.2509, + "eval_screenspot_samples_per_second": 0.581, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 436375116, + "step": 4500 + }, + { + "epoch": 0.43996871333594056, + "eval_compot_CIoU": 0.41727426648139954, + "eval_compot_GIoU": 0.40849290788173676, + "eval_compot_IoU": 0.4897070676088333, + "eval_compot_MAE_all": 0.10452596470713615, + "eval_compot_MAE_h": 0.08314887806773186, + "eval_compot_MAE_w": 0.125019121915102, + "eval_compot_MAE_x": 0.12403874099254608, + "eval_compot_MAE_y": 0.08589714765548706, + "eval_compot_NUM_probability": 0.9999712407588959, + "eval_compot_inside_bbox": 0.6649305522441864, + "eval_compot_loss": 0.3094131648540497, + "eval_compot_loss_ce": 0.013933070003986359, + "eval_compot_loss_iou": 0.44024658203125, + "eval_compot_loss_num": 0.0509185791015625, + "eval_compot_loss_xval": 0.2547607421875, + "eval_compot_runtime": 89.7814, + "eval_compot_samples_per_second": 0.557, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 436375116, + "step": 4500 + }, + { + "epoch": 0.43996871333594056, + "loss": 0.2571384310722351, + "loss_ce": 0.013302953913807869, + "loss_iou": 0.48046875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 436375116, + "step": 4500 + }, + { + "epoch": 0.4400664841611263, + "grad_norm": 10.52290905815234, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 436472056, + "step": 4501 + }, + { + "epoch": 0.4400664841611263, + "loss": 0.08634952455759048, + "loss_ce": 0.0068817525170743465, + "loss_iou": 0.357421875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 436472056, + "step": 4501 + }, + { + "epoch": 0.44016425498631206, + "grad_norm": 4.946450916071531, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 436568604, + "step": 4502 + }, + { + "epoch": 0.44016425498631206, + "loss": 0.07198413461446762, + "loss_ce": 0.005471075419336557, + "loss_iou": 0.283203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 436568604, + "step": 4502 + }, + { + "epoch": 0.44026202581149787, + "grad_norm": 9.024593129463042, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 436666448, + "step": 4503 + }, + { + "epoch": 0.44026202581149787, + "loss": 0.12276201695203781, + "loss_ce": 0.007191949058324099, + "loss_iou": 0.361328125, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 436666448, + "step": 4503 + }, + { + "epoch": 0.4403597966366836, + "grad_norm": 24.879468179687425, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 436762796, + "step": 4504 + }, + { + "epoch": 0.4403597966366836, + "loss": 0.04164258390665054, + "loss_ce": 0.003686344949528575, + "loss_iou": 0.259765625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 436762796, + "step": 4504 + }, + { + "epoch": 0.44045756746186937, + "grad_norm": 9.638305007054544, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 436859748, + "step": 4505 + }, + { + "epoch": 0.44045756746186937, + "loss": 0.08764094859361649, + "loss_ce": 0.0030767356511205435, + "loss_iou": 0.3515625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 436859748, + "step": 4505 + }, + { + "epoch": 0.4405553382870551, + "grad_norm": 8.62146516877826, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 436956892, + "step": 4506 + }, + { + "epoch": 0.4405553382870551, + "loss": 0.06146255135536194, + "loss_ce": 0.003997953608632088, + "loss_iou": 0.294921875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 436956892, + "step": 4506 + }, + { + "epoch": 0.4406531091122409, + "grad_norm": 3.4467728605141077, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 437054136, + "step": 4507 + }, + { + "epoch": 0.4406531091122409, + "loss": 0.044493407011032104, + "loss_ce": 0.00680419709533453, + "loss_iou": 0.265625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 437054136, + "step": 4507 + }, + { + "epoch": 0.4407508799374267, + "grad_norm": 5.839729907502369, + "learning_rate": 5e-05, + "loss": 0.0374, + "num_input_tokens_seen": 437150832, + "step": 4508 + }, + { + "epoch": 0.4407508799374267, + "loss": 0.041227880865335464, + "loss_ce": 0.004118505399674177, + "loss_iou": 0.166015625, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 437150832, + "step": 4508 + }, + { + "epoch": 0.4408486507626124, + "grad_norm": 6.217532761359252, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 437248596, + "step": 4509 + }, + { + "epoch": 0.4408486507626124, + "loss": 0.0693267360329628, + "loss_ce": 0.002371168229728937, + "loss_iou": 0.423828125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 437248596, + "step": 4509 + }, + { + "epoch": 0.4409464215877982, + "grad_norm": 20.80271444637035, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 437346660, + "step": 4510 + }, + { + "epoch": 0.4409464215877982, + "loss": 0.08125866949558258, + "loss_ce": 0.007009402383118868, + "loss_iou": 0.30859375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 437346660, + "step": 4510 + }, + { + "epoch": 0.441044192412984, + "grad_norm": 27.866381605409238, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 437443360, + "step": 4511 + }, + { + "epoch": 0.441044192412984, + "loss": 0.10260864347219467, + "loss_ce": 0.007515868172049522, + "loss_iou": 0.322265625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 437443360, + "step": 4511 + }, + { + "epoch": 0.44114196323816973, + "grad_norm": 12.765985534513346, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 437539672, + "step": 4512 + }, + { + "epoch": 0.44114196323816973, + "loss": 0.05564567819237709, + "loss_ce": 0.001995774917304516, + "loss_iou": 0.1875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 437539672, + "step": 4512 + }, + { + "epoch": 0.4412397340633555, + "grad_norm": 8.733487935499618, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 437636556, + "step": 4513 + }, + { + "epoch": 0.4412397340633555, + "loss": 0.07995826005935669, + "loss_ce": 0.006380378268659115, + "loss_iou": 0.220703125, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 437636556, + "step": 4513 + }, + { + "epoch": 0.44133750488854123, + "grad_norm": 9.056215949287573, + "learning_rate": 5e-05, + "loss": 0.12, + "num_input_tokens_seen": 437733628, + "step": 4514 + }, + { + "epoch": 0.44133750488854123, + "loss": 0.12355522066354752, + "loss_ce": 0.009083788841962814, + "loss_iou": 0.35546875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 437733628, + "step": 4514 + }, + { + "epoch": 0.44143527571372704, + "grad_norm": 3.8498557764306263, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 437830600, + "step": 4515 + }, + { + "epoch": 0.44143527571372704, + "loss": 0.1272769272327423, + "loss_ce": 0.003245864063501358, + "loss_iou": 0.287109375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 437830600, + "step": 4515 + }, + { + "epoch": 0.4415330465389128, + "grad_norm": 12.085840289079265, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 437928268, + "step": 4516 + }, + { + "epoch": 0.4415330465389128, + "loss": 0.0852493867278099, + "loss_ce": 0.003706413321197033, + "loss_iou": 0.322265625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 437928268, + "step": 4516 + }, + { + "epoch": 0.44163081736409854, + "grad_norm": 8.186920335049596, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 438024976, + "step": 4517 + }, + { + "epoch": 0.44163081736409854, + "loss": 0.0759144127368927, + "loss_ce": 0.008909258060157299, + "loss_iou": 0.2451171875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 438024976, + "step": 4517 + }, + { + "epoch": 0.44172858818928434, + "grad_norm": 17.166493157824465, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 438121708, + "step": 4518 + }, + { + "epoch": 0.44172858818928434, + "loss": 0.08020909875631332, + "loss_ce": 0.006310785189270973, + "loss_iou": 0.296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 438121708, + "step": 4518 + }, + { + "epoch": 0.4418263590144701, + "grad_norm": 7.33800659056582, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 438217076, + "step": 4519 + }, + { + "epoch": 0.4418263590144701, + "loss": 0.06390373408794403, + "loss_ce": 0.009155191481113434, + "loss_iou": 0.173828125, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 438217076, + "step": 4519 + }, + { + "epoch": 0.44192412983965584, + "grad_norm": 9.729342184270969, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 438313940, + "step": 4520 + }, + { + "epoch": 0.44192412983965584, + "loss": 0.08735276758670807, + "loss_ce": 0.004299177788197994, + "loss_iou": 0.310546875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 438313940, + "step": 4520 + }, + { + "epoch": 0.4420219006648416, + "grad_norm": 13.386865371311258, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 438411652, + "step": 4521 + }, + { + "epoch": 0.4420219006648416, + "loss": 0.12865370512008667, + "loss_ce": 0.006461314857006073, + "loss_iou": 0.486328125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 438411652, + "step": 4521 + }, + { + "epoch": 0.4421196714900274, + "grad_norm": 8.768945771594401, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 438509364, + "step": 4522 + }, + { + "epoch": 0.4421196714900274, + "loss": 0.10780597478151321, + "loss_ce": 0.004687073174864054, + "loss_iou": 0.3046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 438509364, + "step": 4522 + }, + { + "epoch": 0.44221744231521315, + "grad_norm": 8.730868529202562, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 438605640, + "step": 4523 + }, + { + "epoch": 0.44221744231521315, + "loss": 0.08188600838184357, + "loss_ce": 0.00661440147086978, + "loss_iou": 0.298828125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 438605640, + "step": 4523 + }, + { + "epoch": 0.4423152131403989, + "grad_norm": 3.0113842930606927, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 438702372, + "step": 4524 + }, + { + "epoch": 0.4423152131403989, + "loss": 0.07813245058059692, + "loss_ce": 0.0055921683087944984, + "loss_iou": 0.2021484375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 438702372, + "step": 4524 + }, + { + "epoch": 0.44241298396558465, + "grad_norm": 7.606893861383662, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 438799204, + "step": 4525 + }, + { + "epoch": 0.44241298396558465, + "loss": 0.05352616310119629, + "loss_ce": 0.0026991351041942835, + "loss_iou": 0.2265625, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 438799204, + "step": 4525 + }, + { + "epoch": 0.44251075479077046, + "grad_norm": 5.30429722315778, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 438895036, + "step": 4526 + }, + { + "epoch": 0.44251075479077046, + "loss": 0.037668608129024506, + "loss_ce": 0.004862209782004356, + "loss_iou": 0.28515625, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 438895036, + "step": 4526 + }, + { + "epoch": 0.4426085256159562, + "grad_norm": 39.62557080070392, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 438991672, + "step": 4527 + }, + { + "epoch": 0.4426085256159562, + "loss": 0.08083085715770721, + "loss_ce": 0.005445762537419796, + "loss_iou": 0.265625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 438991672, + "step": 4527 + }, + { + "epoch": 0.44270629644114196, + "grad_norm": 19.79076899906944, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 439089080, + "step": 4528 + }, + { + "epoch": 0.44270629644114196, + "loss": 0.10023097693920135, + "loss_ce": 0.00776271428912878, + "loss_iou": 0.361328125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 439089080, + "step": 4528 + }, + { + "epoch": 0.4428040672663277, + "grad_norm": 5.473280380695561, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 439185524, + "step": 4529 + }, + { + "epoch": 0.4428040672663277, + "loss": 0.09740766882896423, + "loss_ce": 0.009654366411268711, + "loss_iou": 0.2392578125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 439185524, + "step": 4529 + }, + { + "epoch": 0.4429018380915135, + "grad_norm": 19.92325641128952, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 439282848, + "step": 4530 + }, + { + "epoch": 0.4429018380915135, + "loss": 0.06949868053197861, + "loss_ce": 0.0049997782334685326, + "loss_iou": 0.349609375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 439282848, + "step": 4530 + }, + { + "epoch": 0.44299960891669926, + "grad_norm": 8.087728473241286, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 439379036, + "step": 4531 + }, + { + "epoch": 0.44299960891669926, + "loss": 0.07427042722702026, + "loss_ce": 0.003456295235082507, + "loss_iou": 0.2734375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 439379036, + "step": 4531 + }, + { + "epoch": 0.443097379741885, + "grad_norm": 6.494609526186225, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 439475584, + "step": 4532 + }, + { + "epoch": 0.443097379741885, + "loss": 0.06440044939517975, + "loss_ce": 0.006035580765455961, + "loss_iou": 0.3671875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 439475584, + "step": 4532 + }, + { + "epoch": 0.44319515056707076, + "grad_norm": 6.059439618630668, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 439572808, + "step": 4533 + }, + { + "epoch": 0.44319515056707076, + "loss": 0.0606459379196167, + "loss_ce": 0.005844001658260822, + "loss_iou": 0.333984375, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 439572808, + "step": 4533 + }, + { + "epoch": 0.44329292139225657, + "grad_norm": 18.51764360098638, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 439669344, + "step": 4534 + }, + { + "epoch": 0.44329292139225657, + "loss": 0.10198501497507095, + "loss_ce": 0.0034895329736173153, + "loss_iou": 0.357421875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 439669344, + "step": 4534 + }, + { + "epoch": 0.4433906922174423, + "grad_norm": 83.38486469424639, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 439766860, + "step": 4535 + }, + { + "epoch": 0.4433906922174423, + "loss": 0.09443046152591705, + "loss_ce": 0.004403606057167053, + "loss_iou": 0.4375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 439766860, + "step": 4535 + }, + { + "epoch": 0.44348846304262807, + "grad_norm": 17.404901657900314, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 439863160, + "step": 4536 + }, + { + "epoch": 0.44348846304262807, + "loss": 0.11814610660076141, + "loss_ce": 0.008870286867022514, + "loss_iou": 0.384765625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 439863160, + "step": 4536 + }, + { + "epoch": 0.4435862338678138, + "grad_norm": 4.940807694447091, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 439959912, + "step": 4537 + }, + { + "epoch": 0.4435862338678138, + "loss": 0.10088660567998886, + "loss_ce": 0.00631263293325901, + "loss_iou": 0.419921875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 439959912, + "step": 4537 + }, + { + "epoch": 0.4436840046929996, + "grad_norm": 11.367870312764811, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 440056464, + "step": 4538 + }, + { + "epoch": 0.4436840046929996, + "loss": 0.08755314350128174, + "loss_ce": 0.00581180676817894, + "loss_iou": 0.314453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 440056464, + "step": 4538 + }, + { + "epoch": 0.4437817755181854, + "grad_norm": 2.7490347123278958, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 440152344, + "step": 4539 + }, + { + "epoch": 0.4437817755181854, + "loss": 0.12789961695671082, + "loss_ce": 0.007599320728331804, + "loss_iou": 0.34765625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 440152344, + "step": 4539 + }, + { + "epoch": 0.4438795463433711, + "grad_norm": 3.5591197836466506, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 440249600, + "step": 4540 + }, + { + "epoch": 0.4438795463433711, + "loss": 0.0837002620100975, + "loss_ce": 0.0064450157806277275, + "loss_iou": 0.34765625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 440249600, + "step": 4540 + }, + { + "epoch": 0.44397731716855693, + "grad_norm": 5.281779744877294, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 440346752, + "step": 4541 + }, + { + "epoch": 0.44397731716855693, + "loss": 0.09065764397382736, + "loss_ce": 0.004262384492903948, + "loss_iou": 0.337890625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 440346752, + "step": 4541 + }, + { + "epoch": 0.4440750879937427, + "grad_norm": 7.1908347479286014, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 440442716, + "step": 4542 + }, + { + "epoch": 0.4440750879937427, + "loss": 0.0796997994184494, + "loss_ce": 0.0025666221044957638, + "loss_iou": 0.279296875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 440442716, + "step": 4542 + }, + { + "epoch": 0.44417285881892843, + "grad_norm": 7.863434518181885, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 440539808, + "step": 4543 + }, + { + "epoch": 0.44417285881892843, + "loss": 0.10039862245321274, + "loss_ce": 0.006335815414786339, + "loss_iou": 0.34375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 440539808, + "step": 4543 + }, + { + "epoch": 0.4442706296441142, + "grad_norm": 12.000432438631309, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 440636500, + "step": 4544 + }, + { + "epoch": 0.4442706296441142, + "loss": 0.10120454430580139, + "loss_ce": 0.0067526353523135185, + "loss_iou": 0.2373046875, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 440636500, + "step": 4544 + }, + { + "epoch": 0.4443684004693, + "grad_norm": 17.413453459708713, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 440733640, + "step": 4545 + }, + { + "epoch": 0.4443684004693, + "loss": 0.06654693186283112, + "loss_ce": 0.002461921190842986, + "loss_iou": 0.1962890625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 440733640, + "step": 4545 + }, + { + "epoch": 0.44446617129448573, + "grad_norm": 8.988928207405062, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 440830512, + "step": 4546 + }, + { + "epoch": 0.44446617129448573, + "loss": 0.08650977909564972, + "loss_ce": 0.0038681752048432827, + "loss_iou": 0.404296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 440830512, + "step": 4546 + }, + { + "epoch": 0.4445639421196715, + "grad_norm": 15.730002083561551, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 440927056, + "step": 4547 + }, + { + "epoch": 0.4445639421196715, + "loss": 0.05533783137798309, + "loss_ce": 0.0034121759235858917, + "loss_iou": 0.31640625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 440927056, + "step": 4547 + }, + { + "epoch": 0.44466171294485723, + "grad_norm": 17.440623328514864, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 441024124, + "step": 4548 + }, + { + "epoch": 0.44466171294485723, + "loss": 0.08159752190113068, + "loss_ce": 0.005974959582090378, + "loss_iou": 0.2890625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 441024124, + "step": 4548 + }, + { + "epoch": 0.44475948377004304, + "grad_norm": 9.588025728812635, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 441120904, + "step": 4549 + }, + { + "epoch": 0.44475948377004304, + "loss": 0.13303591310977936, + "loss_ce": 0.003710041521117091, + "loss_iou": 0.30859375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 441120904, + "step": 4549 + }, + { + "epoch": 0.4448572545952288, + "grad_norm": 7.6131726547248215, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 441217700, + "step": 4550 + }, + { + "epoch": 0.4448572545952288, + "loss": 0.063287153840065, + "loss_ce": 0.003198040649294853, + "loss_iou": 0.203125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 441217700, + "step": 4550 + }, + { + "epoch": 0.44495502542041454, + "grad_norm": 8.636655477388883, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 441315168, + "step": 4551 + }, + { + "epoch": 0.44495502542041454, + "loss": 0.0946093201637268, + "loss_ce": 0.005101257003843784, + "loss_iou": 0.302734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 441315168, + "step": 4551 + }, + { + "epoch": 0.4450527962456003, + "grad_norm": 13.591600531463047, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 441412048, + "step": 4552 + }, + { + "epoch": 0.4450527962456003, + "loss": 0.0657944530248642, + "loss_ce": 0.006041041575372219, + "loss_iou": 0.318359375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 441412048, + "step": 4552 + }, + { + "epoch": 0.4451505670707861, + "grad_norm": 8.078301525245267, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 441509648, + "step": 4553 + }, + { + "epoch": 0.4451505670707861, + "loss": 0.08202505111694336, + "loss_ce": 0.0056090266443789005, + "loss_iou": 0.359375, + "loss_num": 0.01531982421875, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 441509648, + "step": 4553 + }, + { + "epoch": 0.44524833789597185, + "grad_norm": 12.766247501304512, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 441607388, + "step": 4554 + }, + { + "epoch": 0.44524833789597185, + "loss": 0.08734406530857086, + "loss_ce": 0.002962960395962, + "loss_iou": 0.400390625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 441607388, + "step": 4554 + }, + { + "epoch": 0.4453461087211576, + "grad_norm": 7.500303161828164, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 441706252, + "step": 4555 + }, + { + "epoch": 0.4453461087211576, + "loss": 0.06557642668485641, + "loss_ce": 0.0017756156157702208, + "loss_iou": 0.357421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 441706252, + "step": 4555 + }, + { + "epoch": 0.44544387954634335, + "grad_norm": 17.38633086773549, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 441803132, + "step": 4556 + }, + { + "epoch": 0.44544387954634335, + "loss": 0.06514669954776764, + "loss_ce": 0.005149145144969225, + "loss_iou": 0.275390625, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 441803132, + "step": 4556 + }, + { + "epoch": 0.44554165037152915, + "grad_norm": 8.874493019042529, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 441900200, + "step": 4557 + }, + { + "epoch": 0.44554165037152915, + "loss": 0.09780863672494888, + "loss_ce": 0.007018843665719032, + "loss_iou": 0.318359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 441900200, + "step": 4557 + }, + { + "epoch": 0.4456394211967149, + "grad_norm": 3.8576727436211877, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 441995468, + "step": 4558 + }, + { + "epoch": 0.4456394211967149, + "loss": 0.08913920819759369, + "loss_ce": 0.006055101752281189, + "loss_iou": 0.26953125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 441995468, + "step": 4558 + }, + { + "epoch": 0.44573719202190065, + "grad_norm": 8.576135544739099, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 442092672, + "step": 4559 + }, + { + "epoch": 0.44573719202190065, + "loss": 0.09777906537055969, + "loss_ce": 0.004380012862384319, + "loss_iou": 0.453125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 442092672, + "step": 4559 + }, + { + "epoch": 0.4458349628470864, + "grad_norm": 13.445900293007345, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 442190000, + "step": 4560 + }, + { + "epoch": 0.4458349628470864, + "loss": 0.06436717510223389, + "loss_ce": 0.00419414509087801, + "loss_iou": 0.353515625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 442190000, + "step": 4560 + }, + { + "epoch": 0.4459327336722722, + "grad_norm": 3.967436084801747, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 442287740, + "step": 4561 + }, + { + "epoch": 0.4459327336722722, + "loss": 0.06291978061199188, + "loss_ce": 0.0063096689991652966, + "loss_iou": 0.283203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 442287740, + "step": 4561 + }, + { + "epoch": 0.44603050449745796, + "grad_norm": 4.502828778448931, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 442385844, + "step": 4562 + }, + { + "epoch": 0.44603050449745796, + "loss": 0.12949968874454498, + "loss_ce": 0.008398301899433136, + "loss_iou": 0.306640625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 442385844, + "step": 4562 + }, + { + "epoch": 0.4461282753226437, + "grad_norm": 13.367260759674867, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 442483692, + "step": 4563 + }, + { + "epoch": 0.4461282753226437, + "loss": 0.08358398079872131, + "loss_ce": 0.008045343682169914, + "loss_iou": 0.294921875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 442483692, + "step": 4563 + }, + { + "epoch": 0.4462260461478295, + "grad_norm": 7.2654166765834125, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 442581448, + "step": 4564 + }, + { + "epoch": 0.4462260461478295, + "loss": 0.06555984914302826, + "loss_ce": 0.003418428124859929, + "loss_iou": 0.33984375, + "loss_num": 0.01239013671875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 442581448, + "step": 4564 + }, + { + "epoch": 0.44632381697301526, + "grad_norm": 4.9248252820877525, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 442678060, + "step": 4565 + }, + { + "epoch": 0.44632381697301526, + "loss": 0.09288039058446884, + "loss_ce": 0.009220264852046967, + "loss_iou": 0.28515625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 442678060, + "step": 4565 + }, + { + "epoch": 0.446421587798201, + "grad_norm": 4.363990092412221, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 442775024, + "step": 4566 + }, + { + "epoch": 0.446421587798201, + "loss": 0.07273974269628525, + "loss_ce": 0.007218502927571535, + "loss_iou": 0.310546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 442775024, + "step": 4566 + }, + { + "epoch": 0.44651935862338676, + "grad_norm": 5.906895274622356, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 442871984, + "step": 4567 + }, + { + "epoch": 0.44651935862338676, + "loss": 0.09831008315086365, + "loss_ce": 0.003705594688653946, + "loss_iou": 0.34765625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 442871984, + "step": 4567 + }, + { + "epoch": 0.44661712944857257, + "grad_norm": 15.713070433153183, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 442969884, + "step": 4568 + }, + { + "epoch": 0.44661712944857257, + "loss": 0.12240873277187347, + "loss_ce": 0.005831580609083176, + "loss_iou": 0.3984375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 442969884, + "step": 4568 + }, + { + "epoch": 0.4467149002737583, + "grad_norm": 17.852640002308412, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 443066416, + "step": 4569 + }, + { + "epoch": 0.4467149002737583, + "loss": 0.10009706020355225, + "loss_ce": 0.005248422734439373, + "loss_iou": 0.2451171875, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 443066416, + "step": 4569 + }, + { + "epoch": 0.44681267109894407, + "grad_norm": 3.3980403060720397, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 443163588, + "step": 4570 + }, + { + "epoch": 0.44681267109894407, + "loss": 0.07176721841096878, + "loss_ce": 0.00812280923128128, + "loss_iou": 0.287109375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 443163588, + "step": 4570 + }, + { + "epoch": 0.4469104419241298, + "grad_norm": 11.285025951014024, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 443260252, + "step": 4571 + }, + { + "epoch": 0.4469104419241298, + "loss": 0.11383748054504395, + "loss_ce": 0.003504987573251128, + "loss_iou": 0.21484375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 443260252, + "step": 4571 + }, + { + "epoch": 0.4470082127493156, + "grad_norm": 20.77658956443006, + "learning_rate": 5e-05, + "loss": 0.1203, + "num_input_tokens_seen": 443357060, + "step": 4572 + }, + { + "epoch": 0.4470082127493156, + "loss": 0.13712528347969055, + "loss_ce": 0.013498585671186447, + "loss_iou": 0.3125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 443357060, + "step": 4572 + }, + { + "epoch": 0.4471059835745014, + "grad_norm": 14.15822615665013, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 443454332, + "step": 4573 + }, + { + "epoch": 0.4471059835745014, + "loss": 0.14163248240947723, + "loss_ce": 0.008743686601519585, + "loss_iou": 0.2734375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 443454332, + "step": 4573 + }, + { + "epoch": 0.4472037543996871, + "grad_norm": 16.09537475659704, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 443551480, + "step": 4574 + }, + { + "epoch": 0.4472037543996871, + "loss": 0.08507777750492096, + "loss_ce": 0.00431300513446331, + "loss_iou": 0.2890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 443551480, + "step": 4574 + }, + { + "epoch": 0.4473015252248729, + "grad_norm": 18.04008645601499, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 443648244, + "step": 4575 + }, + { + "epoch": 0.4473015252248729, + "loss": 0.06266922503709793, + "loss_ce": 0.0010084544774144888, + "loss_iou": 0.365234375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 443648244, + "step": 4575 + }, + { + "epoch": 0.4473992960500587, + "grad_norm": 35.58474841205594, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 443745472, + "step": 4576 + }, + { + "epoch": 0.4473992960500587, + "loss": 0.1021537259221077, + "loss_ce": 0.003032631240785122, + "loss_iou": 0.435546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 443745472, + "step": 4576 + }, + { + "epoch": 0.44749706687524443, + "grad_norm": 12.462634056591924, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 443842068, + "step": 4577 + }, + { + "epoch": 0.44749706687524443, + "loss": 0.07989415526390076, + "loss_ce": 0.006468862295150757, + "loss_iou": 0.33984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 443842068, + "step": 4577 + }, + { + "epoch": 0.4475948377004302, + "grad_norm": 4.180803695624847, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 443939348, + "step": 4578 + }, + { + "epoch": 0.4475948377004302, + "loss": 0.06062048301100731, + "loss_ce": 0.004101927392184734, + "loss_iou": 0.341796875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 443939348, + "step": 4578 + }, + { + "epoch": 0.44769260852561593, + "grad_norm": 2.2867843478892684, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 444035904, + "step": 4579 + }, + { + "epoch": 0.44769260852561593, + "loss": 0.06445921212434769, + "loss_ce": 0.007437117397785187, + "loss_iou": 0.287109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 444035904, + "step": 4579 + }, + { + "epoch": 0.44779037935080174, + "grad_norm": 9.845756597138768, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 444133548, + "step": 4580 + }, + { + "epoch": 0.44779037935080174, + "loss": 0.09236340969800949, + "loss_ce": 0.00944715179502964, + "loss_iou": 0.283203125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 444133548, + "step": 4580 + }, + { + "epoch": 0.4478881501759875, + "grad_norm": 9.627599425711077, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 444230436, + "step": 4581 + }, + { + "epoch": 0.4478881501759875, + "loss": 0.09387665241956711, + "loss_ce": 0.00783996656537056, + "loss_iou": 0.291015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 444230436, + "step": 4581 + }, + { + "epoch": 0.44798592100117324, + "grad_norm": 8.556064206627054, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 444327284, + "step": 4582 + }, + { + "epoch": 0.44798592100117324, + "loss": 0.07480466365814209, + "loss_ce": 0.0075744399800896645, + "loss_iou": 0.255859375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 444327284, + "step": 4582 + }, + { + "epoch": 0.448083691826359, + "grad_norm": 9.940221663932396, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 444425252, + "step": 4583 + }, + { + "epoch": 0.448083691826359, + "loss": 0.08619730174541473, + "loss_ce": 0.008270669728517532, + "loss_iou": 0.42578125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 444425252, + "step": 4583 + }, + { + "epoch": 0.4481814626515448, + "grad_norm": 3.8448882696889344, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 444522028, + "step": 4584 + }, + { + "epoch": 0.4481814626515448, + "loss": 0.06347550451755524, + "loss_ce": 0.006316076032817364, + "loss_iou": 0.224609375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 444522028, + "step": 4584 + }, + { + "epoch": 0.44827923347673054, + "grad_norm": 4.033743601915481, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 444618920, + "step": 4585 + }, + { + "epoch": 0.44827923347673054, + "loss": 0.08638983964920044, + "loss_ce": 0.0016120144864544272, + "loss_iou": 0.28125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 444618920, + "step": 4585 + }, + { + "epoch": 0.4483770043019163, + "grad_norm": 2.2696333000112214, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 444716920, + "step": 4586 + }, + { + "epoch": 0.4483770043019163, + "loss": 0.10054947435855865, + "loss_ce": 0.004831093829125166, + "loss_iou": 0.318359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 444716920, + "step": 4586 + }, + { + "epoch": 0.4484747751271021, + "grad_norm": 8.61499714733323, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 444813364, + "step": 4587 + }, + { + "epoch": 0.4484747751271021, + "loss": 0.09582556039094925, + "loss_ce": 0.0038989875465631485, + "loss_iou": 0.22265625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 444813364, + "step": 4587 + }, + { + "epoch": 0.44857254595228785, + "grad_norm": 4.271291934389811, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 444910456, + "step": 4588 + }, + { + "epoch": 0.44857254595228785, + "loss": 0.03844686597585678, + "loss_ce": 0.007669891230762005, + "loss_iou": 0.283203125, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 444910456, + "step": 4588 + }, + { + "epoch": 0.4486703167774736, + "grad_norm": 4.148470609457785, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 445007672, + "step": 4589 + }, + { + "epoch": 0.4486703167774736, + "loss": 0.08637858927249908, + "loss_ce": 0.006639971397817135, + "loss_iou": 0.234375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 445007672, + "step": 4589 + }, + { + "epoch": 0.44876808760265935, + "grad_norm": 4.967613096644415, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 445104308, + "step": 4590 + }, + { + "epoch": 0.44876808760265935, + "loss": 0.026346400380134583, + "loss_ce": 0.004877283703535795, + "loss_iou": 0.2490234375, + "loss_num": 0.004302978515625, + "loss_xval": 0.021484375, + "num_input_tokens_seen": 445104308, + "step": 4590 + }, + { + "epoch": 0.44886585842784515, + "grad_norm": 3.4288263482652104, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 445200520, + "step": 4591 + }, + { + "epoch": 0.44886585842784515, + "loss": 0.059797417372465134, + "loss_ce": 0.008695730939507484, + "loss_iou": 0.34765625, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 445200520, + "step": 4591 + }, + { + "epoch": 0.4489636292530309, + "grad_norm": 15.548262332226614, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 445297576, + "step": 4592 + }, + { + "epoch": 0.4489636292530309, + "loss": 0.1375398337841034, + "loss_ce": 0.009823762811720371, + "loss_iou": 0.306640625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 445297576, + "step": 4592 + }, + { + "epoch": 0.44906140007821665, + "grad_norm": 11.373334538203684, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 445394672, + "step": 4593 + }, + { + "epoch": 0.44906140007821665, + "loss": 0.1121366024017334, + "loss_ce": 0.00656104227527976, + "loss_iou": 0.3203125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 445394672, + "step": 4593 + }, + { + "epoch": 0.4491591709034024, + "grad_norm": 11.950277830606339, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 445492260, + "step": 4594 + }, + { + "epoch": 0.4491591709034024, + "loss": 0.1038517951965332, + "loss_ce": 0.009079458191990852, + "loss_iou": 0.328125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 445492260, + "step": 4594 + }, + { + "epoch": 0.4492569417285882, + "grad_norm": 17.9554831417117, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 445589128, + "step": 4595 + }, + { + "epoch": 0.4492569417285882, + "loss": 0.08699050545692444, + "loss_ce": 0.009323270060122013, + "loss_iou": 0.302734375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 445589128, + "step": 4595 + }, + { + "epoch": 0.44935471255377396, + "grad_norm": 11.114005610684355, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 445686368, + "step": 4596 + }, + { + "epoch": 0.44935471255377396, + "loss": 0.06295545399188995, + "loss_ce": 0.003743720706552267, + "loss_iou": 0.291015625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 445686368, + "step": 4596 + }, + { + "epoch": 0.4494524833789597, + "grad_norm": 30.435117283441862, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 445783372, + "step": 4597 + }, + { + "epoch": 0.4494524833789597, + "loss": 0.07540804147720337, + "loss_ce": 0.003584921360015869, + "loss_iou": 0.296875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 445783372, + "step": 4597 + }, + { + "epoch": 0.44955025420414546, + "grad_norm": 12.896411448087424, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 445880960, + "step": 4598 + }, + { + "epoch": 0.44955025420414546, + "loss": 0.09938506782054901, + "loss_ce": 0.006230156868696213, + "loss_iou": 0.396484375, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 445880960, + "step": 4598 + }, + { + "epoch": 0.44964802502933127, + "grad_norm": 3.369607444688358, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 445978264, + "step": 4599 + }, + { + "epoch": 0.44964802502933127, + "loss": 0.08306009322404861, + "loss_ce": 0.00592691358178854, + "loss_iou": 0.421875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 445978264, + "step": 4599 + }, + { + "epoch": 0.449745795854517, + "grad_norm": 5.2142228981373355, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 446075048, + "step": 4600 + }, + { + "epoch": 0.449745795854517, + "loss": 0.07518455386161804, + "loss_ce": 0.005299301818013191, + "loss_iou": 0.38671875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 446075048, + "step": 4600 + }, + { + "epoch": 0.44984356667970277, + "grad_norm": 4.73227558724224, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 446171528, + "step": 4601 + }, + { + "epoch": 0.44984356667970277, + "loss": 0.07754668593406677, + "loss_ce": 0.005174243822693825, + "loss_iou": 0.396484375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 446171528, + "step": 4601 + }, + { + "epoch": 0.4499413375048885, + "grad_norm": 8.115750388324667, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 446268968, + "step": 4602 + }, + { + "epoch": 0.4499413375048885, + "loss": 0.056085169315338135, + "loss_ce": 0.002023278037086129, + "loss_iou": 0.412109375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 446268968, + "step": 4602 + }, + { + "epoch": 0.4500391083300743, + "grad_norm": 22.255101203081292, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 446366340, + "step": 4603 + }, + { + "epoch": 0.4500391083300743, + "loss": 0.06347024440765381, + "loss_ce": 0.004601833876222372, + "loss_iou": 0.333984375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 446366340, + "step": 4603 + }, + { + "epoch": 0.4501368791552601, + "grad_norm": 8.052821518594081, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 446462932, + "step": 4604 + }, + { + "epoch": 0.4501368791552601, + "loss": 0.0738639235496521, + "loss_ce": 0.005100191105157137, + "loss_iou": 0.2265625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 446462932, + "step": 4604 + }, + { + "epoch": 0.4502346499804458, + "grad_norm": 4.125678933027025, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 446559828, + "step": 4605 + }, + { + "epoch": 0.4502346499804458, + "loss": 0.06218544766306877, + "loss_ce": 0.005087058991193771, + "loss_iou": 0.30078125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 446559828, + "step": 4605 + }, + { + "epoch": 0.4503324208056316, + "grad_norm": 23.612860618676656, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 446656908, + "step": 4606 + }, + { + "epoch": 0.4503324208056316, + "loss": 0.06640647351741791, + "loss_ce": 0.006515721790492535, + "loss_iou": 0.294921875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 446656908, + "step": 4606 + }, + { + "epoch": 0.4504301916308174, + "grad_norm": 24.88050520106074, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 446753500, + "step": 4607 + }, + { + "epoch": 0.4504301916308174, + "loss": 0.08082785457372665, + "loss_ce": 0.0053273639641702175, + "loss_iou": 0.306640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 446753500, + "step": 4607 + }, + { + "epoch": 0.45052796245600313, + "grad_norm": 6.159520494108259, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 446850144, + "step": 4608 + }, + { + "epoch": 0.45052796245600313, + "loss": 0.10939420759677887, + "loss_ce": 0.00537504069507122, + "loss_iou": 0.251953125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 446850144, + "step": 4608 + }, + { + "epoch": 0.4506257332811889, + "grad_norm": 3.0497082035038203, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 446946832, + "step": 4609 + }, + { + "epoch": 0.4506257332811889, + "loss": 0.06219639629125595, + "loss_ce": 0.0133835319429636, + "loss_iou": 0.2158203125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 446946832, + "step": 4609 + }, + { + "epoch": 0.4507235041063747, + "grad_norm": 5.057197331605775, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 447043580, + "step": 4610 + }, + { + "epoch": 0.4507235041063747, + "loss": 0.05803927034139633, + "loss_ce": 0.0032144407741725445, + "loss_iou": 0.25, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 447043580, + "step": 4610 + }, + { + "epoch": 0.45082127493156043, + "grad_norm": 4.909947269435214, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 447140912, + "step": 4611 + }, + { + "epoch": 0.45082127493156043, + "loss": 0.09656692296266556, + "loss_ce": 0.012704621069133282, + "loss_iou": 0.267578125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 447140912, + "step": 4611 + }, + { + "epoch": 0.4509190457567462, + "grad_norm": 6.040450004623714, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 447238264, + "step": 4612 + }, + { + "epoch": 0.4509190457567462, + "loss": 0.05921810120344162, + "loss_ce": 0.003599814372137189, + "loss_iou": 0.357421875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 447238264, + "step": 4612 + }, + { + "epoch": 0.45101681658193193, + "grad_norm": 2.5420379723575532, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 447335568, + "step": 4613 + }, + { + "epoch": 0.45101681658193193, + "loss": 0.08173191547393799, + "loss_ce": 0.003988390322774649, + "loss_iou": 0.333984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 447335568, + "step": 4613 + }, + { + "epoch": 0.45111458740711774, + "grad_norm": 3.6650820922436558, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 447432292, + "step": 4614 + }, + { + "epoch": 0.45111458740711774, + "loss": 0.1029430627822876, + "loss_ce": 0.005042672157287598, + "loss_iou": 0.33984375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 447432292, + "step": 4614 + }, + { + "epoch": 0.4512123582323035, + "grad_norm": 3.0111084550683054, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 447529032, + "step": 4615 + }, + { + "epoch": 0.4512123582323035, + "loss": 0.09092555195093155, + "loss_ce": 0.0047133928164839745, + "loss_iou": 0.263671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 447529032, + "step": 4615 + }, + { + "epoch": 0.45131012905748924, + "grad_norm": 5.040410579571799, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 447625744, + "step": 4616 + }, + { + "epoch": 0.45131012905748924, + "loss": 0.08040028810501099, + "loss_ce": 0.00482350867241621, + "loss_iou": 0.29296875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 447625744, + "step": 4616 + }, + { + "epoch": 0.451407899882675, + "grad_norm": 20.14919817941924, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 447722448, + "step": 4617 + }, + { + "epoch": 0.451407899882675, + "loss": 0.08971958607435226, + "loss_ce": 0.0044839889742434025, + "loss_iou": 0.361328125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 447722448, + "step": 4617 + }, + { + "epoch": 0.4515056707078608, + "grad_norm": 4.996840240651954, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 447819476, + "step": 4618 + }, + { + "epoch": 0.4515056707078608, + "loss": 0.04509429633617401, + "loss_ce": 0.005474850535392761, + "loss_iou": 0.296875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 447819476, + "step": 4618 + }, + { + "epoch": 0.45160344153304655, + "grad_norm": 12.24002281460414, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 447916852, + "step": 4619 + }, + { + "epoch": 0.45160344153304655, + "loss": 0.1118212565779686, + "loss_ce": 0.009984102100133896, + "loss_iou": 0.296875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 447916852, + "step": 4619 + }, + { + "epoch": 0.4517012123582323, + "grad_norm": 8.875091086632638, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 448013508, + "step": 4620 + }, + { + "epoch": 0.4517012123582323, + "loss": 0.08750054240226746, + "loss_ce": 0.004141772165894508, + "loss_iou": 0.1943359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 448013508, + "step": 4620 + }, + { + "epoch": 0.45179898318341805, + "grad_norm": 8.193100863936506, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 448110152, + "step": 4621 + }, + { + "epoch": 0.45179898318341805, + "loss": 0.07448473572731018, + "loss_ce": 0.005820184946060181, + "loss_iou": 0.298828125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 448110152, + "step": 4621 + }, + { + "epoch": 0.45189675400860385, + "grad_norm": 2.993368521065341, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 448206300, + "step": 4622 + }, + { + "epoch": 0.45189675400860385, + "loss": 0.15024131536483765, + "loss_ce": 0.007647926919162273, + "loss_iou": 0.302734375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 448206300, + "step": 4622 + }, + { + "epoch": 0.4519945248337896, + "grad_norm": 5.7473169307227, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 448302968, + "step": 4623 + }, + { + "epoch": 0.4519945248337896, + "loss": 0.06967879831790924, + "loss_ce": 0.002234953921288252, + "loss_iou": 0.279296875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 448302968, + "step": 4623 + }, + { + "epoch": 0.45209229565897535, + "grad_norm": 14.196239985988084, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 448400344, + "step": 4624 + }, + { + "epoch": 0.45209229565897535, + "loss": 0.10562998056411743, + "loss_ce": 0.0059748319908976555, + "loss_iou": 0.291015625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 448400344, + "step": 4624 + }, + { + "epoch": 0.4521900664841611, + "grad_norm": 8.88316652580342, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 448497364, + "step": 4625 + }, + { + "epoch": 0.4521900664841611, + "loss": 0.07689447700977325, + "loss_ce": 0.005544382147490978, + "loss_iou": 0.318359375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 448497364, + "step": 4625 + }, + { + "epoch": 0.4522878373093469, + "grad_norm": 5.095847462651073, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 448594748, + "step": 4626 + }, + { + "epoch": 0.4522878373093469, + "loss": 0.0778038501739502, + "loss_ce": 0.003043418750166893, + "loss_iou": 0.40625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 448594748, + "step": 4626 + }, + { + "epoch": 0.45238560813453266, + "grad_norm": 12.382267237499015, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 448691768, + "step": 4627 + }, + { + "epoch": 0.45238560813453266, + "loss": 0.07466213405132294, + "loss_ce": 0.003235742449760437, + "loss_iou": 0.373046875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 448691768, + "step": 4627 + }, + { + "epoch": 0.4524833789597184, + "grad_norm": 9.558260611090562, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 448788456, + "step": 4628 + }, + { + "epoch": 0.4524833789597184, + "loss": 0.1432296633720398, + "loss_ce": 0.0091659314930439, + "loss_iou": 0.287109375, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 448788456, + "step": 4628 + }, + { + "epoch": 0.45258114978490416, + "grad_norm": 12.055844870205632, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 448885016, + "step": 4629 + }, + { + "epoch": 0.45258114978490416, + "loss": 0.07890186458826065, + "loss_ce": 0.0045152693055570126, + "loss_iou": 0.2333984375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 448885016, + "step": 4629 + }, + { + "epoch": 0.45267892061008996, + "grad_norm": 11.08138408158687, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 448981300, + "step": 4630 + }, + { + "epoch": 0.45267892061008996, + "loss": 0.09479492157697678, + "loss_ce": 0.009589837864041328, + "loss_iou": 0.388671875, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 448981300, + "step": 4630 + }, + { + "epoch": 0.4527766914352757, + "grad_norm": 6.925996412139284, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 449078760, + "step": 4631 + }, + { + "epoch": 0.4527766914352757, + "loss": 0.09928999841213226, + "loss_ce": 0.0076304590329527855, + "loss_iou": 0.1416015625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 449078760, + "step": 4631 + }, + { + "epoch": 0.45287446226046146, + "grad_norm": 11.782496450587509, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 449175616, + "step": 4632 + }, + { + "epoch": 0.45287446226046146, + "loss": 0.08158979564905167, + "loss_ce": 0.009858227334916592, + "loss_iou": 0.322265625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 449175616, + "step": 4632 + }, + { + "epoch": 0.45297223308564727, + "grad_norm": 5.309907240780491, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 449271876, + "step": 4633 + }, + { + "epoch": 0.45297223308564727, + "loss": 0.06248626857995987, + "loss_ce": 0.006105043925344944, + "loss_iou": 0.357421875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 449271876, + "step": 4633 + }, + { + "epoch": 0.453070003910833, + "grad_norm": 3.547301026085908, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 449368968, + "step": 4634 + }, + { + "epoch": 0.453070003910833, + "loss": 0.05146142095327377, + "loss_ce": 0.003655634354799986, + "loss_iou": 0.369140625, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 449368968, + "step": 4634 + }, + { + "epoch": 0.45316777473601877, + "grad_norm": 5.739085919896467, + "learning_rate": 5e-05, + "loss": 0.1082, + "num_input_tokens_seen": 449465752, + "step": 4635 + }, + { + "epoch": 0.45316777473601877, + "loss": 0.06870773434638977, + "loss_ce": 0.004132534842938185, + "loss_iou": 0.3984375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 449465752, + "step": 4635 + }, + { + "epoch": 0.4532655455612045, + "grad_norm": 10.925587457287353, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 449561720, + "step": 4636 + }, + { + "epoch": 0.4532655455612045, + "loss": 0.10054896771907806, + "loss_ce": 0.01468394510447979, + "loss_iou": 0.236328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 449561720, + "step": 4636 + }, + { + "epoch": 0.4533633163863903, + "grad_norm": 14.266983966586658, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 449658520, + "step": 4637 + }, + { + "epoch": 0.4533633163863903, + "loss": 0.0532098188996315, + "loss_ce": 0.0017800686182454228, + "loss_iou": 0.27734375, + "loss_num": 0.01025390625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 449658520, + "step": 4637 + }, + { + "epoch": 0.4534610872115761, + "grad_norm": 10.06088701494103, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 449755940, + "step": 4638 + }, + { + "epoch": 0.4534610872115761, + "loss": 0.0774717777967453, + "loss_ce": 0.0111418217420578, + "loss_iou": 0.3203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 449755940, + "step": 4638 + }, + { + "epoch": 0.4535588580367618, + "grad_norm": 10.77405903111796, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 449852424, + "step": 4639 + }, + { + "epoch": 0.4535588580367618, + "loss": 0.08509881794452667, + "loss_ce": 0.0045629339292645454, + "loss_iou": 0.296875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 449852424, + "step": 4639 + }, + { + "epoch": 0.4536566288619476, + "grad_norm": 11.407913509378801, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 449949388, + "step": 4640 + }, + { + "epoch": 0.4536566288619476, + "loss": 0.10722072422504425, + "loss_ce": 0.0063067213632166386, + "loss_iou": 0.291015625, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 449949388, + "step": 4640 + }, + { + "epoch": 0.4537543996871334, + "grad_norm": 5.7615696514056784, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 450046060, + "step": 4641 + }, + { + "epoch": 0.4537543996871334, + "loss": 0.06506607681512833, + "loss_ce": 0.0019480963237583637, + "loss_iou": 0.255859375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 450046060, + "step": 4641 + }, + { + "epoch": 0.45385217051231913, + "grad_norm": 4.6601693772387005, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 450142724, + "step": 4642 + }, + { + "epoch": 0.45385217051231913, + "loss": 0.06700902432203293, + "loss_ce": 0.0034103915095329285, + "loss_iou": 0.31640625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 450142724, + "step": 4642 + }, + { + "epoch": 0.4539499413375049, + "grad_norm": 16.172810410447973, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 450240716, + "step": 4643 + }, + { + "epoch": 0.4539499413375049, + "loss": 0.1366412490606308, + "loss_ce": 0.009779686108231544, + "loss_iou": 0.3125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 450240716, + "step": 4643 + }, + { + "epoch": 0.45404771216269063, + "grad_norm": 17.106085942597613, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 450338352, + "step": 4644 + }, + { + "epoch": 0.45404771216269063, + "loss": 0.08210189640522003, + "loss_ce": 0.00437362864613533, + "loss_iou": 0.318359375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 450338352, + "step": 4644 + }, + { + "epoch": 0.45414548298787644, + "grad_norm": 19.710338840947532, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 450435484, + "step": 4645 + }, + { + "epoch": 0.45414548298787644, + "loss": 0.12381254136562347, + "loss_ce": 0.0075329365208745, + "loss_iou": 0.28515625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 450435484, + "step": 4645 + }, + { + "epoch": 0.4542432538130622, + "grad_norm": 14.277435336401416, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 450531752, + "step": 4646 + }, + { + "epoch": 0.4542432538130622, + "loss": 0.08061277866363525, + "loss_ce": 0.003159167477861047, + "loss_iou": 0.216796875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 450531752, + "step": 4646 + }, + { + "epoch": 0.45434102463824794, + "grad_norm": 6.572373647536577, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 450628648, + "step": 4647 + }, + { + "epoch": 0.45434102463824794, + "loss": 0.054252054542303085, + "loss_ce": 0.00458469707518816, + "loss_iou": 0.359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 450628648, + "step": 4647 + }, + { + "epoch": 0.4544387954634337, + "grad_norm": 13.601565192257585, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 450726048, + "step": 4648 + }, + { + "epoch": 0.4544387954634337, + "loss": 0.06903766095638275, + "loss_ce": 0.007636293303221464, + "loss_iou": 0.244140625, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 450726048, + "step": 4648 + }, + { + "epoch": 0.4545365662886195, + "grad_norm": 13.079358884052041, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 450823300, + "step": 4649 + }, + { + "epoch": 0.4545365662886195, + "loss": 0.06758281588554382, + "loss_ce": 0.0046860892325639725, + "loss_iou": 0.35546875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 450823300, + "step": 4649 + }, + { + "epoch": 0.45463433711380524, + "grad_norm": 9.954362941989114, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 450920224, + "step": 4650 + }, + { + "epoch": 0.45463433711380524, + "loss": 0.04314495250582695, + "loss_ce": 0.00209880992770195, + "loss_iou": 0.416015625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 450920224, + "step": 4650 + }, + { + "epoch": 0.454732107938991, + "grad_norm": 8.35870788812161, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 451016616, + "step": 4651 + }, + { + "epoch": 0.454732107938991, + "loss": 0.06994254887104034, + "loss_ce": 0.0030632750131189823, + "loss_iou": 0.26171875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 451016616, + "step": 4651 + }, + { + "epoch": 0.45482987876417674, + "grad_norm": 9.354529944366684, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 451113940, + "step": 4652 + }, + { + "epoch": 0.45482987876417674, + "loss": 0.10901346057653427, + "loss_ce": 0.011097810231149197, + "loss_iou": 0.29296875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 451113940, + "step": 4652 + }, + { + "epoch": 0.45492764958936255, + "grad_norm": 4.094374499961855, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 451210528, + "step": 4653 + }, + { + "epoch": 0.45492764958936255, + "loss": 0.0771862268447876, + "loss_ce": 0.0114361010491848, + "loss_iou": 0.2060546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 451210528, + "step": 4653 + }, + { + "epoch": 0.4550254204145483, + "grad_norm": 26.38389334324654, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 451307464, + "step": 4654 + }, + { + "epoch": 0.4550254204145483, + "loss": 0.045569419860839844, + "loss_ce": 0.0069665927439928055, + "loss_iou": 0.28125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 451307464, + "step": 4654 + }, + { + "epoch": 0.45512319123973405, + "grad_norm": 27.97013557759095, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 451405044, + "step": 4655 + }, + { + "epoch": 0.45512319123973405, + "loss": 0.09683524817228317, + "loss_ce": 0.0065184785053133965, + "loss_iou": 0.34375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 451405044, + "step": 4655 + }, + { + "epoch": 0.45522096206491985, + "grad_norm": 4.7418697926310545, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 451502112, + "step": 4656 + }, + { + "epoch": 0.45522096206491985, + "loss": 0.11904893070459366, + "loss_ce": 0.005218366160988808, + "loss_iou": 0.306640625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 451502112, + "step": 4656 + }, + { + "epoch": 0.4553187328901056, + "grad_norm": 7.977644923505213, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 451598284, + "step": 4657 + }, + { + "epoch": 0.4553187328901056, + "loss": 0.06978553533554077, + "loss_ce": 0.0105595039203763, + "loss_iou": 0.248046875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 451598284, + "step": 4657 + }, + { + "epoch": 0.45541650371529135, + "grad_norm": 26.146346218750704, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 451695748, + "step": 4658 + }, + { + "epoch": 0.45541650371529135, + "loss": 0.10565114766359329, + "loss_ce": 0.0028183539398014545, + "loss_iou": 0.306640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 451695748, + "step": 4658 + }, + { + "epoch": 0.4555142745404771, + "grad_norm": 16.16987759984459, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 451792508, + "step": 4659 + }, + { + "epoch": 0.4555142745404771, + "loss": 0.08448624610900879, + "loss_ce": 0.005720383487641811, + "loss_iou": 0.390625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 451792508, + "step": 4659 + }, + { + "epoch": 0.4556120453656629, + "grad_norm": 8.464746987428981, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 451889224, + "step": 4660 + }, + { + "epoch": 0.4556120453656629, + "loss": 0.09218104928731918, + "loss_ce": 0.004656628705561161, + "loss_iou": 0.33203125, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 451889224, + "step": 4660 + }, + { + "epoch": 0.45570981619084866, + "grad_norm": 5.090642904346653, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 451986844, + "step": 4661 + }, + { + "epoch": 0.45570981619084866, + "loss": 0.06298014521598816, + "loss_ce": 0.002356982557103038, + "loss_iou": 0.416015625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 451986844, + "step": 4661 + }, + { + "epoch": 0.4558075870160344, + "grad_norm": 13.345742931407267, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 452083888, + "step": 4662 + }, + { + "epoch": 0.4558075870160344, + "loss": 0.08480728417634964, + "loss_ce": 0.006438141223043203, + "loss_iou": 0.365234375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 452083888, + "step": 4662 + }, + { + "epoch": 0.45590535784122016, + "grad_norm": 25.805296570851446, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 452180100, + "step": 4663 + }, + { + "epoch": 0.45590535784122016, + "loss": 0.05641968548297882, + "loss_ce": 0.00686295423656702, + "loss_iou": 0.3125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 452180100, + "step": 4663 + }, + { + "epoch": 0.45600312866640597, + "grad_norm": 30.341102237926233, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 452277120, + "step": 4664 + }, + { + "epoch": 0.45600312866640597, + "loss": 0.06873232871294022, + "loss_ce": 0.005591457709670067, + "loss_iou": 0.296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 452277120, + "step": 4664 + }, + { + "epoch": 0.4561008994915917, + "grad_norm": 2.9205937908512496, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 452374264, + "step": 4665 + }, + { + "epoch": 0.4561008994915917, + "loss": 0.08156854659318924, + "loss_ce": 0.006800479255616665, + "loss_iou": 0.33203125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 452374264, + "step": 4665 + }, + { + "epoch": 0.45619867031677747, + "grad_norm": 6.704601327186112, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 452470936, + "step": 4666 + }, + { + "epoch": 0.45619867031677747, + "loss": 0.062351346015930176, + "loss_ce": 0.003605008590966463, + "loss_iou": 0.345703125, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 452470936, + "step": 4666 + }, + { + "epoch": 0.4562964411419632, + "grad_norm": 2.9322705595965672, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 452567620, + "step": 4667 + }, + { + "epoch": 0.4562964411419632, + "loss": 0.08018847554922104, + "loss_ce": 0.0069234007969498634, + "loss_iou": 0.2734375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 452567620, + "step": 4667 + }, + { + "epoch": 0.456394211967149, + "grad_norm": 12.576112808985428, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 452665752, + "step": 4668 + }, + { + "epoch": 0.456394211967149, + "loss": 0.07515744864940643, + "loss_ce": 0.003532696282491088, + "loss_iou": 0.306640625, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 452665752, + "step": 4668 + }, + { + "epoch": 0.45649198279233477, + "grad_norm": 9.936019489438818, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 452762268, + "step": 4669 + }, + { + "epoch": 0.45649198279233477, + "loss": 0.05587119236588478, + "loss_ce": 0.0047847675159573555, + "loss_iou": 0.267578125, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 452762268, + "step": 4669 + }, + { + "epoch": 0.4565897536175205, + "grad_norm": 32.65194348664925, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 452858984, + "step": 4670 + }, + { + "epoch": 0.4565897536175205, + "loss": 0.07203127443790436, + "loss_ce": 0.0038855262100696564, + "loss_iou": 0.31640625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 452858984, + "step": 4670 + }, + { + "epoch": 0.45668752444270627, + "grad_norm": 13.463361962630184, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 452957288, + "step": 4671 + }, + { + "epoch": 0.45668752444270627, + "loss": 0.095908522605896, + "loss_ce": 0.00398958008736372, + "loss_iou": 0.4296875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 452957288, + "step": 4671 + }, + { + "epoch": 0.4567852952678921, + "grad_norm": 22.424591514373137, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 453054156, + "step": 4672 + }, + { + "epoch": 0.4567852952678921, + "loss": 0.06828199326992035, + "loss_ce": 0.008192882873117924, + "loss_iou": 0.400390625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 453054156, + "step": 4672 + }, + { + "epoch": 0.4568830660930778, + "grad_norm": 2.6683719733768596, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 453150980, + "step": 4673 + }, + { + "epoch": 0.4568830660930778, + "loss": 0.08755692839622498, + "loss_ce": 0.005266285035759211, + "loss_iou": 0.333984375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 453150980, + "step": 4673 + }, + { + "epoch": 0.4569808369182636, + "grad_norm": 45.90550976901264, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 453248548, + "step": 4674 + }, + { + "epoch": 0.4569808369182636, + "loss": 0.08318331837654114, + "loss_ce": 0.003883383236825466, + "loss_iou": 0.31640625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 453248548, + "step": 4674 + }, + { + "epoch": 0.4570786077434493, + "grad_norm": 9.830716197687858, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 453344956, + "step": 4675 + }, + { + "epoch": 0.4570786077434493, + "loss": 0.06314734369516373, + "loss_ce": 0.003561773570254445, + "loss_iou": 0.439453125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 453344956, + "step": 4675 + }, + { + "epoch": 0.45717637856863513, + "grad_norm": 5.749434129567256, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 453442048, + "step": 4676 + }, + { + "epoch": 0.45717637856863513, + "loss": 0.10204100608825684, + "loss_ce": 0.001989122247323394, + "loss_iou": 0.37890625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 453442048, + "step": 4676 + }, + { + "epoch": 0.4572741493938209, + "grad_norm": 5.878013359235051, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 453539412, + "step": 4677 + }, + { + "epoch": 0.4572741493938209, + "loss": 0.07961555570363998, + "loss_ce": 0.00623603630810976, + "loss_iou": 0.3203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 453539412, + "step": 4677 + }, + { + "epoch": 0.45737192021900663, + "grad_norm": 7.924419607746768, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 453636152, + "step": 4678 + }, + { + "epoch": 0.45737192021900663, + "loss": 0.050560202449560165, + "loss_ce": 0.005180564243346453, + "loss_iou": 0.2890625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 453636152, + "step": 4678 + }, + { + "epoch": 0.45746969104419244, + "grad_norm": 12.116320174038918, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 453733656, + "step": 4679 + }, + { + "epoch": 0.45746969104419244, + "loss": 0.07289145886898041, + "loss_ce": 0.005218727979809046, + "loss_iou": 0.29296875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 453733656, + "step": 4679 + }, + { + "epoch": 0.4575674618693782, + "grad_norm": 8.952248031537701, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 453831084, + "step": 4680 + }, + { + "epoch": 0.4575674618693782, + "loss": 0.10457150638103485, + "loss_ce": 0.01021115854382515, + "loss_iou": 0.328125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 453831084, + "step": 4680 + }, + { + "epoch": 0.45766523269456394, + "grad_norm": 8.424231441854367, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 453928600, + "step": 4681 + }, + { + "epoch": 0.45766523269456394, + "loss": 0.061875686049461365, + "loss_ce": 0.005112990736961365, + "loss_iou": 0.37890625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 453928600, + "step": 4681 + }, + { + "epoch": 0.4577630035197497, + "grad_norm": 11.606847690871511, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 454025672, + "step": 4682 + }, + { + "epoch": 0.4577630035197497, + "loss": 0.09943032264709473, + "loss_ce": 0.0023691661190241575, + "loss_iou": 0.294921875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 454025672, + "step": 4682 + }, + { + "epoch": 0.4578607743449355, + "grad_norm": 16.32796406341307, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 454123148, + "step": 4683 + }, + { + "epoch": 0.4578607743449355, + "loss": 0.065310999751091, + "loss_ce": 0.003520533675327897, + "loss_iou": 0.2890625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 454123148, + "step": 4683 + }, + { + "epoch": 0.45795854517012125, + "grad_norm": 40.55283531707467, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 454219476, + "step": 4684 + }, + { + "epoch": 0.45795854517012125, + "loss": 0.08293463289737701, + "loss_ce": 0.0018341678660362959, + "loss_iou": 0.32421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 454219476, + "step": 4684 + }, + { + "epoch": 0.458056315995307, + "grad_norm": 10.718854051567025, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 454316244, + "step": 4685 + }, + { + "epoch": 0.458056315995307, + "loss": 0.11687814444303513, + "loss_ce": 0.005717863328754902, + "loss_iou": 0.28125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 454316244, + "step": 4685 + }, + { + "epoch": 0.45815408682049275, + "grad_norm": 8.881694235737099, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 454413336, + "step": 4686 + }, + { + "epoch": 0.45815408682049275, + "loss": 0.06783729791641235, + "loss_ce": 0.005543296225368977, + "loss_iou": 0.298828125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 454413336, + "step": 4686 + }, + { + "epoch": 0.45825185764567855, + "grad_norm": 11.53400442306075, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 454509644, + "step": 4687 + }, + { + "epoch": 0.45825185764567855, + "loss": 0.0702289417386055, + "loss_ce": 0.003879911731928587, + "loss_iou": 0.20703125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 454509644, + "step": 4687 + }, + { + "epoch": 0.4583496284708643, + "grad_norm": 10.526786420452906, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 454606784, + "step": 4688 + }, + { + "epoch": 0.4583496284708643, + "loss": 0.07130187749862671, + "loss_ce": 0.005994261242449284, + "loss_iou": 0.2890625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 454606784, + "step": 4688 + }, + { + "epoch": 0.45844739929605005, + "grad_norm": 3.967912309464074, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 454704312, + "step": 4689 + }, + { + "epoch": 0.45844739929605005, + "loss": 0.07317540049552917, + "loss_ce": 0.00606724014505744, + "loss_iou": 0.3984375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 454704312, + "step": 4689 + }, + { + "epoch": 0.4585451701212358, + "grad_norm": 9.667173771825153, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 454801444, + "step": 4690 + }, + { + "epoch": 0.4585451701212358, + "loss": 0.07384715229272842, + "loss_ce": 0.007990216836333275, + "loss_iou": 0.30078125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 454801444, + "step": 4690 + }, + { + "epoch": 0.4586429409464216, + "grad_norm": 5.28309604761165, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 454898020, + "step": 4691 + }, + { + "epoch": 0.4586429409464216, + "loss": 0.06872887164354324, + "loss_ce": 0.0036806543357670307, + "loss_iou": 0.283203125, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 454898020, + "step": 4691 + }, + { + "epoch": 0.45874071177160736, + "grad_norm": 5.553507439480368, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 454994820, + "step": 4692 + }, + { + "epoch": 0.45874071177160736, + "loss": 0.08186692744493484, + "loss_ce": 0.0061070360243320465, + "loss_iou": 0.25390625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 454994820, + "step": 4692 + }, + { + "epoch": 0.4588384825967931, + "grad_norm": 2.9705600364415643, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 455092244, + "step": 4693 + }, + { + "epoch": 0.4588384825967931, + "loss": 0.05066358298063278, + "loss_ce": 0.006687751971185207, + "loss_iou": 0.314453125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 455092244, + "step": 4693 + }, + { + "epoch": 0.45893625342197886, + "grad_norm": 4.976541411444607, + "learning_rate": 5e-05, + "loss": 0.1204, + "num_input_tokens_seen": 455189664, + "step": 4694 + }, + { + "epoch": 0.45893625342197886, + "loss": 0.13191355764865875, + "loss_ce": 0.008023634552955627, + "loss_iou": 0.26171875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 455189664, + "step": 4694 + }, + { + "epoch": 0.45903402424716466, + "grad_norm": 10.400422942652474, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 455286792, + "step": 4695 + }, + { + "epoch": 0.45903402424716466, + "loss": 0.06050843000411987, + "loss_ce": 0.00232666521333158, + "loss_iou": 0.298828125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 455286792, + "step": 4695 + }, + { + "epoch": 0.4591317950723504, + "grad_norm": 21.812039689874076, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 455383104, + "step": 4696 + }, + { + "epoch": 0.4591317950723504, + "loss": 0.13498520851135254, + "loss_ce": 0.0021803462877869606, + "loss_iou": 0.294921875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 455383104, + "step": 4696 + }, + { + "epoch": 0.45922956589753616, + "grad_norm": 9.858581678344764, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 455480244, + "step": 4697 + }, + { + "epoch": 0.45922956589753616, + "loss": 0.14397267997264862, + "loss_ce": 0.004965114872902632, + "loss_iou": 0.25390625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 455480244, + "step": 4697 + }, + { + "epoch": 0.4593273367227219, + "grad_norm": 5.446759558994222, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 455577792, + "step": 4698 + }, + { + "epoch": 0.4593273367227219, + "loss": 0.08443509787321091, + "loss_ce": 0.007851233705878258, + "loss_iou": 0.296875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 455577792, + "step": 4698 + }, + { + "epoch": 0.4594251075479077, + "grad_norm": 10.193266923017985, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 455675104, + "step": 4699 + }, + { + "epoch": 0.4594251075479077, + "loss": 0.08383215963840485, + "loss_ce": 0.005157841369509697, + "loss_iou": 0.43359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 455675104, + "step": 4699 + }, + { + "epoch": 0.45952287837309347, + "grad_norm": 12.596844900040827, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 455772636, + "step": 4700 + }, + { + "epoch": 0.45952287837309347, + "loss": 0.07267876714468002, + "loss_ce": 0.0020000580698251724, + "loss_iou": 0.328125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 455772636, + "step": 4700 + }, + { + "epoch": 0.4596206491982792, + "grad_norm": 11.345261781877204, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 455869952, + "step": 4701 + }, + { + "epoch": 0.4596206491982792, + "loss": 0.12131273746490479, + "loss_ce": 0.0029045408591628075, + "loss_iou": 0.453125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 455869952, + "step": 4701 + }, + { + "epoch": 0.459718420023465, + "grad_norm": 17.00626609827382, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 455967012, + "step": 4702 + }, + { + "epoch": 0.459718420023465, + "loss": 0.07408082485198975, + "loss_ce": 0.003966692369431257, + "loss_iou": 0.26953125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 455967012, + "step": 4702 + }, + { + "epoch": 0.4598161908486508, + "grad_norm": 6.522858476960234, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 456063944, + "step": 4703 + }, + { + "epoch": 0.4598161908486508, + "loss": 0.05926066264510155, + "loss_ce": 0.004916489589959383, + "loss_iou": 0.287109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 456063944, + "step": 4703 + }, + { + "epoch": 0.4599139616738365, + "grad_norm": 6.588056969764839, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 456161604, + "step": 4704 + }, + { + "epoch": 0.4599139616738365, + "loss": 0.10603396594524384, + "loss_ce": 0.0033423176500946283, + "loss_iou": 0.25390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 456161604, + "step": 4704 + }, + { + "epoch": 0.4600117324990223, + "grad_norm": 25.4880341911035, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 456258944, + "step": 4705 + }, + { + "epoch": 0.4600117324990223, + "loss": 0.08658137917518616, + "loss_ce": 0.006877095438539982, + "loss_iou": 0.326171875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 456258944, + "step": 4705 + }, + { + "epoch": 0.4601095033242081, + "grad_norm": 6.689255153490444, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 456355544, + "step": 4706 + }, + { + "epoch": 0.4601095033242081, + "loss": 0.13738611340522766, + "loss_ce": 0.006236818619072437, + "loss_iou": 0.1923828125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 456355544, + "step": 4706 + }, + { + "epoch": 0.46020727414939383, + "grad_norm": 7.723821081610091, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 456452888, + "step": 4707 + }, + { + "epoch": 0.46020727414939383, + "loss": 0.06324860453605652, + "loss_ce": 0.005463575944304466, + "loss_iou": 0.216796875, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 456452888, + "step": 4707 + }, + { + "epoch": 0.4603050449745796, + "grad_norm": 3.6359896369069733, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 456549484, + "step": 4708 + }, + { + "epoch": 0.4603050449745796, + "loss": 0.07455814629793167, + "loss_ce": 0.006473424378782511, + "loss_iou": 0.271484375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 456549484, + "step": 4708 + }, + { + "epoch": 0.46040281579976533, + "grad_norm": 6.24590420257636, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 456646884, + "step": 4709 + }, + { + "epoch": 0.46040281579976533, + "loss": 0.09592114388942719, + "loss_ce": 0.006138435564935207, + "loss_iou": 0.3515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 456646884, + "step": 4709 + }, + { + "epoch": 0.46050058662495114, + "grad_norm": 6.5129394921431745, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 456743664, + "step": 4710 + }, + { + "epoch": 0.46050058662495114, + "loss": 0.07808604836463928, + "loss_ce": 0.004798082634806633, + "loss_iou": 0.3046875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 456743664, + "step": 4710 + }, + { + "epoch": 0.4605983574501369, + "grad_norm": 12.475188046527723, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 456841576, + "step": 4711 + }, + { + "epoch": 0.4605983574501369, + "loss": 0.08019755780696869, + "loss_ce": 0.005627856124192476, + "loss_iou": 0.291015625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 456841576, + "step": 4711 + }, + { + "epoch": 0.46069612827532264, + "grad_norm": 12.823885881575011, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 456938516, + "step": 4712 + }, + { + "epoch": 0.46069612827532264, + "loss": 0.10534512996673584, + "loss_ce": 0.0058578248135745525, + "loss_iou": 0.34375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 456938516, + "step": 4712 + }, + { + "epoch": 0.4607938991005084, + "grad_norm": 4.816288670705685, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 457034856, + "step": 4713 + }, + { + "epoch": 0.4607938991005084, + "loss": 0.10463978350162506, + "loss_ce": 0.006739386823028326, + "loss_iou": 0.287109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 457034856, + "step": 4713 + }, + { + "epoch": 0.4608916699256942, + "grad_norm": 11.058182888845323, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 457130884, + "step": 4714 + }, + { + "epoch": 0.4608916699256942, + "loss": 0.0942917913198471, + "loss_ce": 0.00722513860091567, + "loss_iou": 0.298828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 457130884, + "step": 4714 + }, + { + "epoch": 0.46098944075087994, + "grad_norm": 8.563884099869169, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 457228668, + "step": 4715 + }, + { + "epoch": 0.46098944075087994, + "loss": 0.07372845709323883, + "loss_ce": 0.007779967505484819, + "loss_iou": 0.28515625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 457228668, + "step": 4715 + }, + { + "epoch": 0.4610872115760657, + "grad_norm": 3.1041273286050215, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 457325780, + "step": 4716 + }, + { + "epoch": 0.4610872115760657, + "loss": 0.06316327303647995, + "loss_ce": 0.002723214915022254, + "loss_iou": 0.2734375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 457325780, + "step": 4716 + }, + { + "epoch": 0.46118498240125144, + "grad_norm": 3.8767966616762077, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 457422596, + "step": 4717 + }, + { + "epoch": 0.46118498240125144, + "loss": 0.08058644831180573, + "loss_ce": 0.006169332191348076, + "loss_iou": 0.291015625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 457422596, + "step": 4717 + }, + { + "epoch": 0.46128275322643725, + "grad_norm": 7.153740318471693, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 457519832, + "step": 4718 + }, + { + "epoch": 0.46128275322643725, + "loss": 0.0591207891702652, + "loss_ce": 0.004707949701696634, + "loss_iou": 0.38671875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 457519832, + "step": 4718 + }, + { + "epoch": 0.461380524051623, + "grad_norm": 5.321264799654169, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 457616848, + "step": 4719 + }, + { + "epoch": 0.461380524051623, + "loss": 0.09013654291629791, + "loss_ce": 0.004107493907213211, + "loss_iou": 0.28125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 457616848, + "step": 4719 + }, + { + "epoch": 0.46147829487680875, + "grad_norm": 5.194512973864422, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 457714224, + "step": 4720 + }, + { + "epoch": 0.46147829487680875, + "loss": 0.08537354320287704, + "loss_ce": 0.0031439291778951883, + "loss_iou": 0.30859375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 457714224, + "step": 4720 + }, + { + "epoch": 0.4615760657019945, + "grad_norm": 17.145350213869516, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 457811236, + "step": 4721 + }, + { + "epoch": 0.4615760657019945, + "loss": 0.05790793523192406, + "loss_ce": 0.005265113897621632, + "loss_iou": 0.3515625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 457811236, + "step": 4721 + }, + { + "epoch": 0.4616738365271803, + "grad_norm": 3.144640454373571, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 457907296, + "step": 4722 + }, + { + "epoch": 0.4616738365271803, + "loss": 0.09569156169891357, + "loss_ce": 0.014057036489248276, + "loss_iou": 0.28125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 457907296, + "step": 4722 + }, + { + "epoch": 0.46177160735236605, + "grad_norm": 15.280979910657376, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 458004744, + "step": 4723 + }, + { + "epoch": 0.46177160735236605, + "loss": 0.08539743721485138, + "loss_ce": 0.0043198587372899055, + "loss_iou": 0.32421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 458004744, + "step": 4723 + }, + { + "epoch": 0.4618693781775518, + "grad_norm": 6.732716514101877, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 458101944, + "step": 4724 + }, + { + "epoch": 0.4618693781775518, + "loss": 0.08437924087047577, + "loss_ce": 0.0046062953770160675, + "loss_iou": 0.2890625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 458101944, + "step": 4724 + }, + { + "epoch": 0.4619671490027376, + "grad_norm": 6.2742209656144805, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 458197992, + "step": 4725 + }, + { + "epoch": 0.4619671490027376, + "loss": 0.04441455751657486, + "loss_ce": 0.004627264104783535, + "loss_iou": 0.263671875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 458197992, + "step": 4725 + }, + { + "epoch": 0.46206491982792336, + "grad_norm": 2.105931529552836, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 458295396, + "step": 4726 + }, + { + "epoch": 0.46206491982792336, + "loss": 0.06190190091729164, + "loss_ce": 0.005154463928192854, + "loss_iou": 0.380859375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 458295396, + "step": 4726 + }, + { + "epoch": 0.4621626906531091, + "grad_norm": 8.05626373731435, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 458392264, + "step": 4727 + }, + { + "epoch": 0.4621626906531091, + "loss": 0.08861149847507477, + "loss_ce": 0.005374799948185682, + "loss_iou": 0.26953125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 458392264, + "step": 4727 + }, + { + "epoch": 0.46226046147829486, + "grad_norm": 12.330355403266683, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 458489968, + "step": 4728 + }, + { + "epoch": 0.46226046147829486, + "loss": 0.07437547296285629, + "loss_ce": 0.0013011338887736201, + "loss_iou": 0.244140625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 458489968, + "step": 4728 + }, + { + "epoch": 0.46235823230348067, + "grad_norm": 9.09849007346706, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 458587304, + "step": 4729 + }, + { + "epoch": 0.46235823230348067, + "loss": 0.08979657292366028, + "loss_ce": 0.00740483682602644, + "loss_iou": 0.35546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 458587304, + "step": 4729 + }, + { + "epoch": 0.4624560031286664, + "grad_norm": 6.6561602774619075, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 458684080, + "step": 4730 + }, + { + "epoch": 0.4624560031286664, + "loss": 0.09583799540996552, + "loss_ce": 0.004895621910691261, + "loss_iou": 0.27734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 458684080, + "step": 4730 + }, + { + "epoch": 0.46255377395385217, + "grad_norm": 9.409949216533482, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 458780480, + "step": 4731 + }, + { + "epoch": 0.46255377395385217, + "loss": 0.10272076725959778, + "loss_ce": 0.005049264058470726, + "loss_iou": 0.384765625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 458780480, + "step": 4731 + }, + { + "epoch": 0.4626515447790379, + "grad_norm": 19.864223790631605, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 458878520, + "step": 4732 + }, + { + "epoch": 0.4626515447790379, + "loss": 0.10123759508132935, + "loss_ce": 0.0023911623284220695, + "loss_iou": 0.4765625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 458878520, + "step": 4732 + }, + { + "epoch": 0.4627493156042237, + "grad_norm": 10.65487517738381, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 458975328, + "step": 4733 + }, + { + "epoch": 0.4627493156042237, + "loss": 0.07098166644573212, + "loss_ce": 0.007033987902104855, + "loss_iou": 0.29296875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 458975328, + "step": 4733 + }, + { + "epoch": 0.46284708642940947, + "grad_norm": 6.287801134069191, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 459073236, + "step": 4734 + }, + { + "epoch": 0.46284708642940947, + "loss": 0.0948081910610199, + "loss_ce": 0.004079432692378759, + "loss_iou": 0.373046875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 459073236, + "step": 4734 + }, + { + "epoch": 0.4629448572545952, + "grad_norm": 17.106315554127164, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 459169756, + "step": 4735 + }, + { + "epoch": 0.4629448572545952, + "loss": 0.09805980324745178, + "loss_ce": 0.006079813931137323, + "loss_iou": 0.314453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 459169756, + "step": 4735 + }, + { + "epoch": 0.46304262807978097, + "grad_norm": 37.501919578801136, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 459266704, + "step": 4736 + }, + { + "epoch": 0.46304262807978097, + "loss": 0.14019915461540222, + "loss_ce": 0.003663518000394106, + "loss_iou": 0.375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 459266704, + "step": 4736 + }, + { + "epoch": 0.4631403989049668, + "grad_norm": 17.87695554848911, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 459363556, + "step": 4737 + }, + { + "epoch": 0.4631403989049668, + "loss": 0.1027061939239502, + "loss_ce": 0.005629773251712322, + "loss_iou": 0.36328125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 459363556, + "step": 4737 + }, + { + "epoch": 0.4632381697301525, + "grad_norm": 7.827456561015684, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 459461008, + "step": 4738 + }, + { + "epoch": 0.4632381697301525, + "loss": 0.1224212646484375, + "loss_ce": 0.009689327329397202, + "loss_iou": 0.427734375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 459461008, + "step": 4738 + }, + { + "epoch": 0.4633359405553383, + "grad_norm": 4.366992059942849, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 459557576, + "step": 4739 + }, + { + "epoch": 0.4633359405553383, + "loss": 0.04452303424477577, + "loss_ce": 0.005513940006494522, + "loss_iou": 0.259765625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 459557576, + "step": 4739 + }, + { + "epoch": 0.463433711380524, + "grad_norm": 2.2566561881337206, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 459654604, + "step": 4740 + }, + { + "epoch": 0.463433711380524, + "loss": 0.07306230813264847, + "loss_ce": 0.004306205082684755, + "loss_iou": 0.25390625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 459654604, + "step": 4740 + }, + { + "epoch": 0.46353148220570983, + "grad_norm": 4.860031679521447, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 459752560, + "step": 4741 + }, + { + "epoch": 0.46353148220570983, + "loss": 0.08192192763090134, + "loss_ce": 0.004407282453030348, + "loss_iou": 0.271484375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 459752560, + "step": 4741 + }, + { + "epoch": 0.4636292530308956, + "grad_norm": 19.340336616397856, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 459849424, + "step": 4742 + }, + { + "epoch": 0.4636292530308956, + "loss": 0.06800052523612976, + "loss_ce": 0.00532123539596796, + "loss_iou": 0.1708984375, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 459849424, + "step": 4742 + }, + { + "epoch": 0.46372702385608133, + "grad_norm": 5.302088068539694, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 459945376, + "step": 4743 + }, + { + "epoch": 0.46372702385608133, + "loss": 0.05210505425930023, + "loss_ce": 0.0037652160972356796, + "loss_iou": 0.2197265625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 459945376, + "step": 4743 + }, + { + "epoch": 0.4638247946812671, + "grad_norm": 2.5228117893356665, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 460041660, + "step": 4744 + }, + { + "epoch": 0.4638247946812671, + "loss": 0.05280156806111336, + "loss_ce": 0.0035843425430357456, + "loss_iou": 0.365234375, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 460041660, + "step": 4744 + }, + { + "epoch": 0.4639225655064529, + "grad_norm": 9.362550902439397, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 460139296, + "step": 4745 + }, + { + "epoch": 0.4639225655064529, + "loss": 0.08604881912469864, + "loss_ce": 0.002461170544847846, + "loss_iou": 0.408203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 460139296, + "step": 4745 + }, + { + "epoch": 0.46402033633163864, + "grad_norm": 5.77079578808716, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 460236512, + "step": 4746 + }, + { + "epoch": 0.46402033633163864, + "loss": 0.13123784959316254, + "loss_ce": 0.006878727115690708, + "loss_iou": 0.236328125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 460236512, + "step": 4746 + }, + { + "epoch": 0.4641181071568244, + "grad_norm": 8.139579512825803, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 460333872, + "step": 4747 + }, + { + "epoch": 0.4641181071568244, + "loss": 0.0878753587603569, + "loss_ce": 0.003921504132449627, + "loss_iou": 0.318359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 460333872, + "step": 4747 + }, + { + "epoch": 0.4642158779820102, + "grad_norm": 19.92624213391723, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 460431012, + "step": 4748 + }, + { + "epoch": 0.4642158779820102, + "loss": 0.08463169634342194, + "loss_ce": 0.005011334549635649, + "loss_iou": 0.333984375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 460431012, + "step": 4748 + }, + { + "epoch": 0.46431364880719594, + "grad_norm": 6.668467183271042, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 460528392, + "step": 4749 + }, + { + "epoch": 0.46431364880719594, + "loss": 0.08189444988965988, + "loss_ce": 0.004669711925089359, + "loss_iou": 0.408203125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 460528392, + "step": 4749 + }, + { + "epoch": 0.4644114196323817, + "grad_norm": 3.612629359481738, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 460624656, + "step": 4750 + }, + { + "epoch": 0.4644114196323817, + "eval_seeclick_CIoU": 0.4540628343820572, + "eval_seeclick_GIoU": 0.45463666319847107, + "eval_seeclick_IoU": 0.5291394144296646, + "eval_seeclick_MAE_all": 0.11061718314886093, + "eval_seeclick_MAE_h": 0.038123659789562225, + "eval_seeclick_MAE_w": 0.18569032847881317, + "eval_seeclick_MAE_x": 0.18339194729924202, + "eval_seeclick_MAE_y": 0.03526279330253601, + "eval_seeclick_NUM_probability": 0.9999963939189911, + "eval_seeclick_inside_bbox": 0.6633522808551788, + "eval_seeclick_loss": 0.31023186445236206, + "eval_seeclick_loss_ce": 0.010236290283501148, + "eval_seeclick_loss_iou": 0.3936767578125, + "eval_seeclick_loss_num": 0.05760955810546875, + "eval_seeclick_loss_xval": 0.288116455078125, + "eval_seeclick_runtime": 77.6506, + "eval_seeclick_samples_per_second": 0.554, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 460624656, + "step": 4750 + }, + { + "epoch": 0.4644114196323817, + "eval_icons_CIoU": 0.6938662528991699, + "eval_icons_GIoU": 0.6805226802825928, + "eval_icons_IoU": 0.7243521511554718, + "eval_icons_MAE_all": 0.0585053451359272, + "eval_icons_MAE_h": 0.07239668071269989, + "eval_icons_MAE_w": 0.04652398265898228, + "eval_icons_MAE_x": 0.04790319316089153, + "eval_icons_MAE_y": 0.06719751469790936, + "eval_icons_NUM_probability": 0.9999969899654388, + "eval_icons_inside_bbox": 0.8229166567325592, + "eval_icons_loss": 0.17788799107074738, + "eval_icons_loss_ce": 6.9591583269357216e-06, + "eval_icons_loss_iou": 0.38714599609375, + "eval_icons_loss_num": 0.037799835205078125, + "eval_icons_loss_xval": 0.18891143798828125, + "eval_icons_runtime": 87.6188, + "eval_icons_samples_per_second": 0.571, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 460624656, + "step": 4750 + }, + { + "epoch": 0.4644114196323817, + "eval_screenspot_CIoU": 0.33290398120880127, + "eval_screenspot_GIoU": 0.3163748284180959, + "eval_screenspot_IoU": 0.4190041820208232, + "eval_screenspot_MAE_all": 0.15102512389421463, + "eval_screenspot_MAE_h": 0.12183316797018051, + "eval_screenspot_MAE_w": 0.19661287715037665, + "eval_screenspot_MAE_x": 0.17359466602404913, + "eval_screenspot_MAE_y": 0.11205977946519852, + "eval_screenspot_NUM_probability": 0.9999893506368002, + "eval_screenspot_inside_bbox": 0.7012499968210856, + "eval_screenspot_loss": 0.5657138824462891, + "eval_screenspot_loss_ce": 0.032527983809510864, + "eval_screenspot_loss_iou": 0.4058430989583333, + "eval_screenspot_loss_num": 0.107330322265625, + "eval_screenspot_loss_xval": 0.5366617838541666, + "eval_screenspot_runtime": 150.1978, + "eval_screenspot_samples_per_second": 0.593, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 460624656, + "step": 4750 + }, + { + "epoch": 0.4644114196323817, + "eval_compot_CIoU": 0.4726688116788864, + "eval_compot_GIoU": 0.4558645039796829, + "eval_compot_IoU": 0.537581592798233, + "eval_compot_MAE_all": 0.0939112938940525, + "eval_compot_MAE_h": 0.07405582070350647, + "eval_compot_MAE_w": 0.11772523075342178, + "eval_compot_MAE_x": 0.10765037313103676, + "eval_compot_MAE_y": 0.07621375098824501, + "eval_compot_NUM_probability": 0.9999862313270569, + "eval_compot_inside_bbox": 0.7083333432674408, + "eval_compot_loss": 0.2957334816455841, + "eval_compot_loss_ce": 0.017869800329208374, + "eval_compot_loss_iou": 0.41607666015625, + "eval_compot_loss_num": 0.049144744873046875, + "eval_compot_loss_xval": 0.245849609375, + "eval_compot_runtime": 87.89, + "eval_compot_samples_per_second": 0.569, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 460624656, + "step": 4750 + }, + { + "epoch": 0.4644114196323817, + "loss": 0.2387882024049759, + "loss_ce": 0.01863439381122589, + "loss_iou": 0.43359375, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 460624656, + "step": 4750 + }, + { + "epoch": 0.46450919045756744, + "grad_norm": 6.618023686643122, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 460721088, + "step": 4751 + }, + { + "epoch": 0.46450919045756744, + "loss": 0.06534360349178314, + "loss_ce": 0.0036523237358778715, + "loss_iou": 0.42578125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 460721088, + "step": 4751 + }, + { + "epoch": 0.46460696128275325, + "grad_norm": 3.813079983036629, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 460819072, + "step": 4752 + }, + { + "epoch": 0.46460696128275325, + "loss": 0.0658172219991684, + "loss_ce": 0.0030883406288921833, + "loss_iou": 0.361328125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 460819072, + "step": 4752 + }, + { + "epoch": 0.464704732107939, + "grad_norm": 12.583475790629096, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 460915252, + "step": 4753 + }, + { + "epoch": 0.464704732107939, + "loss": 0.1116146445274353, + "loss_ce": 0.0029720631428062916, + "loss_iou": 0.2421875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 460915252, + "step": 4753 + }, + { + "epoch": 0.46480250293312475, + "grad_norm": 6.401415675877144, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 461012012, + "step": 4754 + }, + { + "epoch": 0.46480250293312475, + "loss": 0.06261493265628815, + "loss_ce": 0.005562312435358763, + "loss_iou": 0.19140625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 461012012, + "step": 4754 + }, + { + "epoch": 0.4649002737583105, + "grad_norm": 9.627931741590391, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 461108508, + "step": 4755 + }, + { + "epoch": 0.4649002737583105, + "loss": 0.11661231517791748, + "loss_ce": 0.006146306172013283, + "loss_iou": 0.2890625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 461108508, + "step": 4755 + }, + { + "epoch": 0.4649980445834963, + "grad_norm": 5.539159016545395, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 461206620, + "step": 4756 + }, + { + "epoch": 0.4649980445834963, + "loss": 0.06698597967624664, + "loss_ce": 0.006103412248194218, + "loss_iou": 0.32421875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 461206620, + "step": 4756 + }, + { + "epoch": 0.46509581540868206, + "grad_norm": 5.495890953363275, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 461303432, + "step": 4757 + }, + { + "epoch": 0.46509581540868206, + "loss": 0.10245378315448761, + "loss_ce": 0.0027376001235097647, + "loss_iou": 0.37109375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 461303432, + "step": 4757 + }, + { + "epoch": 0.4651935862338678, + "grad_norm": 6.953095190730633, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 461400104, + "step": 4758 + }, + { + "epoch": 0.4651935862338678, + "loss": 0.0491313561797142, + "loss_ce": 0.0033282884396612644, + "loss_iou": 0.3125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 461400104, + "step": 4758 + }, + { + "epoch": 0.46529135705905356, + "grad_norm": 7.841927468256115, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 461497124, + "step": 4759 + }, + { + "epoch": 0.46529135705905356, + "loss": 0.08129269629716873, + "loss_ce": 0.006356784608215094, + "loss_iou": 0.400390625, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 461497124, + "step": 4759 + }, + { + "epoch": 0.46538912788423936, + "grad_norm": 6.318782823116494, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 461594044, + "step": 4760 + }, + { + "epoch": 0.46538912788423936, + "loss": 0.10143765062093735, + "loss_ce": 0.008442956022918224, + "loss_iou": 0.265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 461594044, + "step": 4760 + }, + { + "epoch": 0.4654868987094251, + "grad_norm": 2.9701078392966074, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 461691120, + "step": 4761 + }, + { + "epoch": 0.4654868987094251, + "loss": 0.0692017674446106, + "loss_ce": 0.005847272463142872, + "loss_iou": 0.30859375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 461691120, + "step": 4761 + }, + { + "epoch": 0.46558466953461086, + "grad_norm": 12.413353559490865, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 461788900, + "step": 4762 + }, + { + "epoch": 0.46558466953461086, + "loss": 0.04290943965315819, + "loss_ce": 0.0016649311874061823, + "loss_iou": 0.33984375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 461788900, + "step": 4762 + }, + { + "epoch": 0.4656824403597966, + "grad_norm": 6.216059182699431, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 461885620, + "step": 4763 + }, + { + "epoch": 0.4656824403597966, + "loss": 0.07691483199596405, + "loss_ce": 0.002326057758182287, + "loss_iou": 0.318359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 461885620, + "step": 4763 + }, + { + "epoch": 0.4657802111849824, + "grad_norm": 7.229221126077564, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 461982452, + "step": 4764 + }, + { + "epoch": 0.4657802111849824, + "loss": 0.08350164443254471, + "loss_ce": 0.007566277869045734, + "loss_iou": 0.345703125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 461982452, + "step": 4764 + }, + { + "epoch": 0.46587798201016817, + "grad_norm": 12.097361566902485, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 462078868, + "step": 4765 + }, + { + "epoch": 0.46587798201016817, + "loss": 0.07511324435472488, + "loss_ce": 0.007654139772057533, + "loss_iou": 0.291015625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 462078868, + "step": 4765 + }, + { + "epoch": 0.4659757528353539, + "grad_norm": 4.60663672459389, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 462175280, + "step": 4766 + }, + { + "epoch": 0.4659757528353539, + "loss": 0.11253326386213303, + "loss_ce": 0.004073795396834612, + "loss_iou": 0.28515625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 462175280, + "step": 4766 + }, + { + "epoch": 0.46607352366053967, + "grad_norm": 2.902494052883175, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 462271856, + "step": 4767 + }, + { + "epoch": 0.46607352366053967, + "loss": 0.09631310403347015, + "loss_ce": 0.006591422017663717, + "loss_iou": 0.318359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 462271856, + "step": 4767 + }, + { + "epoch": 0.4661712944857255, + "grad_norm": 2.926423486587589, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 462369072, + "step": 4768 + }, + { + "epoch": 0.4661712944857255, + "loss": 0.09847606718540192, + "loss_ce": 0.004771840292960405, + "loss_iou": 0.302734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 462369072, + "step": 4768 + }, + { + "epoch": 0.4662690653109112, + "grad_norm": 4.013049742414296, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 462465664, + "step": 4769 + }, + { + "epoch": 0.4662690653109112, + "loss": 0.0819011777639389, + "loss_ce": 0.004844295792281628, + "loss_iou": 0.275390625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 462465664, + "step": 4769 + }, + { + "epoch": 0.466366836136097, + "grad_norm": 15.833440329047164, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 462563688, + "step": 4770 + }, + { + "epoch": 0.466366836136097, + "loss": 0.05842871218919754, + "loss_ce": 0.0036801777314394712, + "loss_iou": 0.41796875, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 462563688, + "step": 4770 + }, + { + "epoch": 0.4664646069612828, + "grad_norm": 6.287373271830624, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 462660536, + "step": 4771 + }, + { + "epoch": 0.4664646069612828, + "loss": 0.09389335662126541, + "loss_ce": 0.0035613272339105606, + "loss_iou": 0.412109375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 462660536, + "step": 4771 + }, + { + "epoch": 0.46656237778646853, + "grad_norm": 24.92252393381634, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 462757740, + "step": 4772 + }, + { + "epoch": 0.46656237778646853, + "loss": 0.09247703850269318, + "loss_ce": 0.006600569002330303, + "loss_iou": 0.322265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 462757740, + "step": 4772 + }, + { + "epoch": 0.4666601486116543, + "grad_norm": 14.811998189310478, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 462853864, + "step": 4773 + }, + { + "epoch": 0.4666601486116543, + "loss": 0.06272153556346893, + "loss_ce": 0.006233502179384232, + "loss_iou": 0.29296875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 462853864, + "step": 4773 + }, + { + "epoch": 0.46675791943684003, + "grad_norm": 2.2868208173071634, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 462950332, + "step": 4774 + }, + { + "epoch": 0.46675791943684003, + "loss": 0.04954300820827484, + "loss_ce": 0.004590614698827267, + "loss_iou": 0.244140625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 462950332, + "step": 4774 + }, + { + "epoch": 0.46685569026202584, + "grad_norm": 9.634154059184727, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 463047628, + "step": 4775 + }, + { + "epoch": 0.46685569026202584, + "loss": 0.0798129290342331, + "loss_ce": 0.011854097247123718, + "loss_iou": 0.236328125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 463047628, + "step": 4775 + }, + { + "epoch": 0.4669534610872116, + "grad_norm": 10.966858630878193, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 463144996, + "step": 4776 + }, + { + "epoch": 0.4669534610872116, + "loss": 0.07490479946136475, + "loss_ce": 0.0045922924764454365, + "loss_iou": 0.330078125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 463144996, + "step": 4776 + }, + { + "epoch": 0.46705123191239734, + "grad_norm": 3.360801725431858, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 463242772, + "step": 4777 + }, + { + "epoch": 0.46705123191239734, + "loss": 0.08777089416980743, + "loss_ce": 0.003214309923350811, + "loss_iou": 0.3671875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 463242772, + "step": 4777 + }, + { + "epoch": 0.4671490027375831, + "grad_norm": 4.063417469355398, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 463339676, + "step": 4778 + }, + { + "epoch": 0.4671490027375831, + "loss": 0.0685787945985794, + "loss_ce": 0.004270621575415134, + "loss_iou": 0.203125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 463339676, + "step": 4778 + }, + { + "epoch": 0.4672467735627689, + "grad_norm": 4.471631917830815, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 463436016, + "step": 4779 + }, + { + "epoch": 0.4672467735627689, + "loss": 0.08895444869995117, + "loss_ce": 0.003459460334852338, + "loss_iou": 0.34375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 463436016, + "step": 4779 + }, + { + "epoch": 0.46734454438795464, + "grad_norm": 9.588538287668067, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 463532492, + "step": 4780 + }, + { + "epoch": 0.46734454438795464, + "loss": 0.062169913202524185, + "loss_ce": 0.003744009882211685, + "loss_iou": 0.318359375, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 463532492, + "step": 4780 + }, + { + "epoch": 0.4674423152131404, + "grad_norm": 8.239744368300263, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 463629296, + "step": 4781 + }, + { + "epoch": 0.4674423152131404, + "loss": 0.07995784282684326, + "loss_ce": 0.004831192549318075, + "loss_iou": 0.36328125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 463629296, + "step": 4781 + }, + { + "epoch": 0.46754008603832614, + "grad_norm": 4.027911347201271, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 463725332, + "step": 4782 + }, + { + "epoch": 0.46754008603832614, + "loss": 0.08973740041255951, + "loss_ce": 0.0024571307003498077, + "loss_iou": 0.34765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 463725332, + "step": 4782 + }, + { + "epoch": 0.46763785686351195, + "grad_norm": 4.302750448809955, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 463822232, + "step": 4783 + }, + { + "epoch": 0.46763785686351195, + "loss": 0.13118046522140503, + "loss_ce": 0.006973921321332455, + "loss_iou": 0.388671875, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 463822232, + "step": 4783 + }, + { + "epoch": 0.4677356276886977, + "grad_norm": 6.183649714640931, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 463919612, + "step": 4784 + }, + { + "epoch": 0.4677356276886977, + "loss": 0.053697556257247925, + "loss_ce": 0.002740824595093727, + "loss_iou": 0.3359375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 463919612, + "step": 4784 + }, + { + "epoch": 0.46783339851388345, + "grad_norm": 9.428355006855524, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 464015336, + "step": 4785 + }, + { + "epoch": 0.46783339851388345, + "loss": 0.06919077038764954, + "loss_ce": 0.00856760237365961, + "loss_iou": 0.33203125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 464015336, + "step": 4785 + }, + { + "epoch": 0.4679311693390692, + "grad_norm": 16.27298305862701, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 464112048, + "step": 4786 + }, + { + "epoch": 0.4679311693390692, + "loss": 0.06198696047067642, + "loss_ce": 0.00248531112447381, + "loss_iou": 0.34765625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 464112048, + "step": 4786 + }, + { + "epoch": 0.468028940164255, + "grad_norm": 3.7293731766733207, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 464208536, + "step": 4787 + }, + { + "epoch": 0.468028940164255, + "loss": 0.056905943900346756, + "loss_ce": 0.010366635397076607, + "loss_iou": 0.37109375, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 464208536, + "step": 4787 + }, + { + "epoch": 0.46812671098944075, + "grad_norm": 6.8826609248244175, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 464304840, + "step": 4788 + }, + { + "epoch": 0.46812671098944075, + "loss": 0.08361933380365372, + "loss_ce": 0.003266550600528717, + "loss_iou": 0.296875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 464304840, + "step": 4788 + }, + { + "epoch": 0.4682244818146265, + "grad_norm": 13.10049053511346, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 464401504, + "step": 4789 + }, + { + "epoch": 0.4682244818146265, + "loss": 0.08702833950519562, + "loss_ce": 0.00704176863655448, + "loss_iou": 0.33984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 464401504, + "step": 4789 + }, + { + "epoch": 0.46832225263981225, + "grad_norm": 12.080806849232884, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 464498460, + "step": 4790 + }, + { + "epoch": 0.46832225263981225, + "loss": 0.07019579410552979, + "loss_ce": 0.005574825685471296, + "loss_iou": 0.357421875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 464498460, + "step": 4790 + }, + { + "epoch": 0.46842002346499806, + "grad_norm": 5.520099851403904, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 464595372, + "step": 4791 + }, + { + "epoch": 0.46842002346499806, + "loss": 0.07687652856111526, + "loss_ce": 0.004641419276595116, + "loss_iou": 0.41015625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 464595372, + "step": 4791 + }, + { + "epoch": 0.4685177942901838, + "grad_norm": 3.703029468003232, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 464692396, + "step": 4792 + }, + { + "epoch": 0.4685177942901838, + "loss": 0.07726892083883286, + "loss_ce": 0.00543054286390543, + "loss_iou": 0.251953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 464692396, + "step": 4792 + }, + { + "epoch": 0.46861556511536956, + "grad_norm": 12.885898776676694, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 464789420, + "step": 4793 + }, + { + "epoch": 0.46861556511536956, + "loss": 0.07775934040546417, + "loss_ce": 0.004685000516474247, + "loss_iou": 0.34375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 464789420, + "step": 4793 + }, + { + "epoch": 0.46871333594055536, + "grad_norm": 34.42208987824474, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 464886324, + "step": 4794 + }, + { + "epoch": 0.46871333594055536, + "loss": 0.0793372094631195, + "loss_ce": 0.004431816749274731, + "loss_iou": 0.326171875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 464886324, + "step": 4794 + }, + { + "epoch": 0.4688111067657411, + "grad_norm": 13.590372511691518, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 464982632, + "step": 4795 + }, + { + "epoch": 0.4688111067657411, + "loss": 0.082755908370018, + "loss_ce": 0.01091752853244543, + "loss_iou": 0.306640625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 464982632, + "step": 4795 + }, + { + "epoch": 0.46890887759092686, + "grad_norm": 17.052103003265163, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 465079288, + "step": 4796 + }, + { + "epoch": 0.46890887759092686, + "loss": 0.06303200125694275, + "loss_ce": 0.006322711706161499, + "loss_iou": 0.1865234375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 465079288, + "step": 4796 + }, + { + "epoch": 0.4690066484161126, + "grad_norm": 4.745649481417596, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 465176112, + "step": 4797 + }, + { + "epoch": 0.4690066484161126, + "loss": 0.12822966277599335, + "loss_ce": 0.0023827985860407352, + "loss_iou": 0.267578125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 465176112, + "step": 4797 + }, + { + "epoch": 0.4691044192412984, + "grad_norm": 8.306363941580928, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 465272584, + "step": 4798 + }, + { + "epoch": 0.4691044192412984, + "loss": 0.1430719792842865, + "loss_ce": 0.004644251894205809, + "loss_iou": 0.23046875, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 465272584, + "step": 4798 + }, + { + "epoch": 0.46920219006648417, + "grad_norm": 3.8065678333445714, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 465370088, + "step": 4799 + }, + { + "epoch": 0.46920219006648417, + "loss": 0.06237826123833656, + "loss_ce": 0.005829188972711563, + "loss_iou": 0.376953125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 465370088, + "step": 4799 + }, + { + "epoch": 0.4692999608916699, + "grad_norm": 5.000443764285881, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 465467448, + "step": 4800 + }, + { + "epoch": 0.4692999608916699, + "loss": 0.12276337295770645, + "loss_ce": 0.0035159396938979626, + "loss_iou": 0.33203125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 465467448, + "step": 4800 + }, + { + "epoch": 0.46939773171685567, + "grad_norm": 4.666078543706112, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 465563720, + "step": 4801 + }, + { + "epoch": 0.46939773171685567, + "loss": 0.06766408681869507, + "loss_ce": 0.009924830868840218, + "loss_iou": 0.271484375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 465563720, + "step": 4801 + }, + { + "epoch": 0.4694955025420415, + "grad_norm": 6.561839146595181, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 465661252, + "step": 4802 + }, + { + "epoch": 0.4694955025420415, + "loss": 0.0747617855668068, + "loss_ce": 0.005792062729597092, + "loss_iou": 0.28125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 465661252, + "step": 4802 + }, + { + "epoch": 0.4695932733672272, + "grad_norm": 15.931808539092104, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 465758100, + "step": 4803 + }, + { + "epoch": 0.4695932733672272, + "loss": 0.06313414126634598, + "loss_ce": 0.008675524964928627, + "loss_iou": 0.224609375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 465758100, + "step": 4803 + }, + { + "epoch": 0.469691044192413, + "grad_norm": 30.967656910360905, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 465855196, + "step": 4804 + }, + { + "epoch": 0.469691044192413, + "loss": 0.07668473571538925, + "loss_ce": 0.0033014058135449886, + "loss_iou": 0.328125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 465855196, + "step": 4804 + }, + { + "epoch": 0.4697888150175987, + "grad_norm": 13.124929729298385, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 465952368, + "step": 4805 + }, + { + "epoch": 0.4697888150175987, + "loss": 0.09814776480197906, + "loss_ce": 0.004992859438061714, + "loss_iou": 0.341796875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 465952368, + "step": 4805 + }, + { + "epoch": 0.46988658584278453, + "grad_norm": 7.80762248858206, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 466050248, + "step": 4806 + }, + { + "epoch": 0.46988658584278453, + "loss": 0.05547301471233368, + "loss_ce": 0.004909201059490442, + "loss_iou": 0.25, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 466050248, + "step": 4806 + }, + { + "epoch": 0.4699843566679703, + "grad_norm": 11.337132119617234, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 466146880, + "step": 4807 + }, + { + "epoch": 0.4699843566679703, + "loss": 0.10685323178768158, + "loss_ce": 0.002483110409229994, + "loss_iou": 0.41796875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 466146880, + "step": 4807 + }, + { + "epoch": 0.47008212749315603, + "grad_norm": 26.668388873025698, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 466243844, + "step": 4808 + }, + { + "epoch": 0.47008212749315603, + "loss": 0.0737912729382515, + "loss_ce": 0.002349618123844266, + "loss_iou": 0.2734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 466243844, + "step": 4808 + }, + { + "epoch": 0.4701798983183418, + "grad_norm": 15.33219825828523, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 466340976, + "step": 4809 + }, + { + "epoch": 0.4701798983183418, + "loss": 0.08760903775691986, + "loss_ce": 0.0017173195956274867, + "loss_iou": 0.41796875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 466340976, + "step": 4809 + }, + { + "epoch": 0.4702776691435276, + "grad_norm": 21.817962849180336, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 466438228, + "step": 4810 + }, + { + "epoch": 0.4702776691435276, + "loss": 0.07228583097457886, + "loss_ce": 0.004414735361933708, + "loss_iou": 0.337890625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 466438228, + "step": 4810 + }, + { + "epoch": 0.47037543996871334, + "grad_norm": 15.227068333869932, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 466534080, + "step": 4811 + }, + { + "epoch": 0.47037543996871334, + "loss": 0.08490589261054993, + "loss_ce": 0.00423267250880599, + "loss_iou": 0.34375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 466534080, + "step": 4811 + }, + { + "epoch": 0.4704732107938991, + "grad_norm": 3.2440489081522674, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 466630412, + "step": 4812 + }, + { + "epoch": 0.4704732107938991, + "loss": 0.0711226835846901, + "loss_ce": 0.006593262776732445, + "loss_iou": 0.25390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 466630412, + "step": 4812 + }, + { + "epoch": 0.47057098161908484, + "grad_norm": 19.11110359026292, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 466727848, + "step": 4813 + }, + { + "epoch": 0.47057098161908484, + "loss": 0.04920012876391411, + "loss_ce": 0.003233028342947364, + "loss_iou": 0.384765625, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 466727848, + "step": 4813 + }, + { + "epoch": 0.47066875244427064, + "grad_norm": 7.624990992575963, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 466824128, + "step": 4814 + }, + { + "epoch": 0.47066875244427064, + "loss": 0.06137806549668312, + "loss_ce": 0.0022655166685581207, + "loss_iou": 0.283203125, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 466824128, + "step": 4814 + }, + { + "epoch": 0.4707665232694564, + "grad_norm": 2.9009388423856386, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 466920540, + "step": 4815 + }, + { + "epoch": 0.4707665232694564, + "loss": 0.04066689684987068, + "loss_ce": 0.0017340956255793571, + "loss_iou": 0.2021484375, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 466920540, + "step": 4815 + }, + { + "epoch": 0.47086429409464214, + "grad_norm": 19.201626237580857, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 467018984, + "step": 4816 + }, + { + "epoch": 0.47086429409464214, + "loss": 0.0712396577000618, + "loss_ce": 0.003948397934436798, + "loss_iou": 0.328125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 467018984, + "step": 4816 + }, + { + "epoch": 0.47096206491982795, + "grad_norm": 24.663830464661807, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 467116412, + "step": 4817 + }, + { + "epoch": 0.47096206491982795, + "loss": 0.05862634629011154, + "loss_ce": 0.0012838179245591164, + "loss_iou": 0.310546875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 467116412, + "step": 4817 + }, + { + "epoch": 0.4710598357450137, + "grad_norm": 13.446418420124699, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 467213788, + "step": 4818 + }, + { + "epoch": 0.4710598357450137, + "loss": 0.05870208144187927, + "loss_ce": 0.004930112510919571, + "loss_iou": 0.27734375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 467213788, + "step": 4818 + }, + { + "epoch": 0.47115760657019945, + "grad_norm": 10.614463190075668, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 467310336, + "step": 4819 + }, + { + "epoch": 0.47115760657019945, + "loss": 0.07433022558689117, + "loss_ce": 0.011174092069268227, + "loss_iou": 0.26171875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 467310336, + "step": 4819 + }, + { + "epoch": 0.4712553773953852, + "grad_norm": 6.128296564425482, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 467406992, + "step": 4820 + }, + { + "epoch": 0.4712553773953852, + "loss": 0.11619681119918823, + "loss_ce": 0.004655062686651945, + "loss_iou": 0.255859375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 467406992, + "step": 4820 + }, + { + "epoch": 0.471353148220571, + "grad_norm": 16.921191218780216, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 467503512, + "step": 4821 + }, + { + "epoch": 0.471353148220571, + "loss": 0.0738513246178627, + "loss_ce": 0.004347539506852627, + "loss_iou": 0.251953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 467503512, + "step": 4821 + }, + { + "epoch": 0.47145091904575676, + "grad_norm": 13.304230550388846, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 467600592, + "step": 4822 + }, + { + "epoch": 0.47145091904575676, + "loss": 0.0882602334022522, + "loss_ce": 0.003490028902888298, + "loss_iou": 0.33203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 467600592, + "step": 4822 + }, + { + "epoch": 0.4715486898709425, + "grad_norm": 21.625896719271857, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 467698348, + "step": 4823 + }, + { + "epoch": 0.4715486898709425, + "loss": 0.09558075666427612, + "loss_ce": 0.00540131377056241, + "loss_iou": 0.3203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 467698348, + "step": 4823 + }, + { + "epoch": 0.47164646069612826, + "grad_norm": 10.614767111125495, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 467794740, + "step": 4824 + }, + { + "epoch": 0.47164646069612826, + "loss": 0.057466086000204086, + "loss_ce": 0.007638510782271624, + "loss_iou": 0.365234375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 467794740, + "step": 4824 + }, + { + "epoch": 0.47174423152131406, + "grad_norm": 4.35243151508797, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 467892248, + "step": 4825 + }, + { + "epoch": 0.47174423152131406, + "loss": 0.08957359939813614, + "loss_ce": 0.008702021092176437, + "loss_iou": 0.296875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 467892248, + "step": 4825 + }, + { + "epoch": 0.4718420023464998, + "grad_norm": 4.802858280337577, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 467989324, + "step": 4826 + }, + { + "epoch": 0.4718420023464998, + "loss": 0.050623733550310135, + "loss_ce": 0.010828808881342411, + "loss_iou": 0.3359375, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 467989324, + "step": 4826 + }, + { + "epoch": 0.47193977317168556, + "grad_norm": 4.268722234986265, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 468084692, + "step": 4827 + }, + { + "epoch": 0.47193977317168556, + "loss": 0.053895920515060425, + "loss_ce": 0.00459858775138855, + "loss_iou": 0.21875, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 468084692, + "step": 4827 + }, + { + "epoch": 0.4720375439968713, + "grad_norm": 2.5149539613488305, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 468181088, + "step": 4828 + }, + { + "epoch": 0.4720375439968713, + "loss": 0.0903373509645462, + "loss_ce": 0.0033241077326238155, + "loss_iou": 0.267578125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 468181088, + "step": 4828 + }, + { + "epoch": 0.4721353148220571, + "grad_norm": 8.067838946853373, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 468277296, + "step": 4829 + }, + { + "epoch": 0.4721353148220571, + "loss": 0.09522604942321777, + "loss_ce": 0.0065190731547772884, + "loss_iou": 0.291015625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 468277296, + "step": 4829 + }, + { + "epoch": 0.47223308564724287, + "grad_norm": 9.700487611920122, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 468374144, + "step": 4830 + }, + { + "epoch": 0.47223308564724287, + "loss": 0.05726049095392227, + "loss_ce": 0.006296137347817421, + "loss_iou": 0.384765625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 468374144, + "step": 4830 + }, + { + "epoch": 0.4723308564724286, + "grad_norm": 4.91071935859529, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 468470512, + "step": 4831 + }, + { + "epoch": 0.4723308564724286, + "loss": 0.03988214209675789, + "loss_ce": 0.003108459059149027, + "loss_iou": 0.357421875, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 468470512, + "step": 4831 + }, + { + "epoch": 0.47242862729761437, + "grad_norm": 10.693422426823936, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 468567172, + "step": 4832 + }, + { + "epoch": 0.47242862729761437, + "loss": 0.06616408377885818, + "loss_ce": 0.006563254632055759, + "loss_iou": 0.2353515625, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 468567172, + "step": 4832 + }, + { + "epoch": 0.4725263981228002, + "grad_norm": 5.174713247919707, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 468665024, + "step": 4833 + }, + { + "epoch": 0.4725263981228002, + "loss": 0.08941510319709778, + "loss_ce": 0.004568604286760092, + "loss_iou": 0.28125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 468665024, + "step": 4833 + }, + { + "epoch": 0.4726241689479859, + "grad_norm": 19.214194095189598, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 468762100, + "step": 4834 + }, + { + "epoch": 0.4726241689479859, + "loss": 0.06054503098130226, + "loss_ce": 0.006483140867203474, + "loss_iou": 0.330078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 468762100, + "step": 4834 + }, + { + "epoch": 0.4727219397731717, + "grad_norm": 16.698491168078448, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 468858856, + "step": 4835 + }, + { + "epoch": 0.4727219397731717, + "loss": 0.08483365923166275, + "loss_ce": 0.004007854498922825, + "loss_iou": 0.2353515625, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 468858856, + "step": 4835 + }, + { + "epoch": 0.4728197105983574, + "grad_norm": 1.7816859806721592, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 468954500, + "step": 4836 + }, + { + "epoch": 0.4728197105983574, + "loss": 0.06269221752882004, + "loss_ce": 0.007539327256381512, + "loss_iou": 0.2255859375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 468954500, + "step": 4836 + }, + { + "epoch": 0.47291748142354323, + "grad_norm": 6.368776289683657, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 469051976, + "step": 4837 + }, + { + "epoch": 0.47291748142354323, + "loss": 0.05762393772602081, + "loss_ce": 0.0043249912559986115, + "loss_iou": 0.26953125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 469051976, + "step": 4837 + }, + { + "epoch": 0.473015252248729, + "grad_norm": 14.854446089761325, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 469148844, + "step": 4838 + }, + { + "epoch": 0.473015252248729, + "loss": 0.06663677841424942, + "loss_ce": 0.007035946007817984, + "loss_iou": 0.3046875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 469148844, + "step": 4838 + }, + { + "epoch": 0.47311302307391473, + "grad_norm": 7.151631582932838, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 469246432, + "step": 4839 + }, + { + "epoch": 0.47311302307391473, + "loss": 0.10268320143222809, + "loss_ce": 0.0054389433935284615, + "loss_iou": 0.322265625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 469246432, + "step": 4839 + }, + { + "epoch": 0.47321079389910053, + "grad_norm": 8.065736544909445, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 469343656, + "step": 4840 + }, + { + "epoch": 0.47321079389910053, + "loss": 0.09003793448209763, + "loss_ce": 0.012813203036785126, + "loss_iou": 0.20703125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 469343656, + "step": 4840 + }, + { + "epoch": 0.4733085647242863, + "grad_norm": 26.46763559739903, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 469441340, + "step": 4841 + }, + { + "epoch": 0.4733085647242863, + "loss": 0.06555791199207306, + "loss_ce": 0.005087326280772686, + "loss_iou": 0.3359375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 469441340, + "step": 4841 + }, + { + "epoch": 0.47340633554947203, + "grad_norm": 4.547306513628468, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 469538148, + "step": 4842 + }, + { + "epoch": 0.47340633554947203, + "loss": 0.12292700260877609, + "loss_ce": 0.006471922155469656, + "loss_iou": 0.267578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 469538148, + "step": 4842 + }, + { + "epoch": 0.4735041063746578, + "grad_norm": 8.364231296956802, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 469635016, + "step": 4843 + }, + { + "epoch": 0.4735041063746578, + "loss": 0.049038030207157135, + "loss_ce": 0.0059929825365543365, + "loss_iou": 0.27734375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 469635016, + "step": 4843 + }, + { + "epoch": 0.4736018771998436, + "grad_norm": 6.962208329763514, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 469732000, + "step": 4844 + }, + { + "epoch": 0.4736018771998436, + "loss": 0.08514256775379181, + "loss_ce": 0.006002862937748432, + "loss_iou": 0.291015625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 469732000, + "step": 4844 + }, + { + "epoch": 0.47369964802502934, + "grad_norm": 2.1484372077112717, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 469829680, + "step": 4845 + }, + { + "epoch": 0.47369964802502934, + "loss": 0.03176019713282585, + "loss_ce": 0.0045308866538107395, + "loss_iou": 0.265625, + "loss_num": 0.00543212890625, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 469829680, + "step": 4845 + }, + { + "epoch": 0.4737974188502151, + "grad_norm": 3.61490283933823, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 469926680, + "step": 4846 + }, + { + "epoch": 0.4737974188502151, + "loss": 0.08459649980068207, + "loss_ce": 0.013337954878807068, + "loss_iou": 0.26953125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 469926680, + "step": 4846 + }, + { + "epoch": 0.47389518967540084, + "grad_norm": 6.713434159262737, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 470023224, + "step": 4847 + }, + { + "epoch": 0.47389518967540084, + "loss": 0.06907180696725845, + "loss_ce": 0.006159821525216103, + "loss_iou": 0.28515625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 470023224, + "step": 4847 + }, + { + "epoch": 0.47399296050058665, + "grad_norm": 12.182481522789697, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 470119452, + "step": 4848 + }, + { + "epoch": 0.47399296050058665, + "loss": 0.08776858448982239, + "loss_ce": 0.005744969006627798, + "loss_iou": 0.2734375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 470119452, + "step": 4848 + }, + { + "epoch": 0.4740907313257724, + "grad_norm": 2.218155558391472, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 470216548, + "step": 4849 + }, + { + "epoch": 0.4740907313257724, + "loss": 0.08980008214712143, + "loss_ce": 0.006036962848156691, + "loss_iou": 0.26953125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 470216548, + "step": 4849 + }, + { + "epoch": 0.47418850215095815, + "grad_norm": 9.077673223746187, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 470314056, + "step": 4850 + }, + { + "epoch": 0.47418850215095815, + "loss": 0.107411690056324, + "loss_ce": 0.007878606207668781, + "loss_iou": 0.349609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 470314056, + "step": 4850 + }, + { + "epoch": 0.4742862729761439, + "grad_norm": 15.519753826044392, + "learning_rate": 5e-05, + "loss": 0.1273, + "num_input_tokens_seen": 470411416, + "step": 4851 + }, + { + "epoch": 0.4742862729761439, + "loss": 0.13345742225646973, + "loss_ce": 0.0036051373463124037, + "loss_iou": 0.328125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 470411416, + "step": 4851 + }, + { + "epoch": 0.4743840438013297, + "grad_norm": 22.2212492412285, + "learning_rate": 5e-05, + "loss": 0.1204, + "num_input_tokens_seen": 470509048, + "step": 4852 + }, + { + "epoch": 0.4743840438013297, + "loss": 0.16427581012248993, + "loss_ce": 0.0028988549020141363, + "loss_iou": 0.37890625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 470509048, + "step": 4852 + }, + { + "epoch": 0.47448181462651545, + "grad_norm": 11.203871154069114, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 470606028, + "step": 4853 + }, + { + "epoch": 0.47448181462651545, + "loss": 0.06219400465488434, + "loss_ce": 0.007247102912515402, + "loss_iou": 0.421875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 470606028, + "step": 4853 + }, + { + "epoch": 0.4745795854517012, + "grad_norm": 5.8000775378560006, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 470703292, + "step": 4854 + }, + { + "epoch": 0.4745795854517012, + "loss": 0.06192086264491081, + "loss_ce": 0.003494958858937025, + "loss_iou": 0.32421875, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 470703292, + "step": 4854 + }, + { + "epoch": 0.47467735627688695, + "grad_norm": 12.539872246606272, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 470800704, + "step": 4855 + }, + { + "epoch": 0.47467735627688695, + "loss": 0.08913979679346085, + "loss_ce": 0.005826810374855995, + "loss_iou": 0.330078125, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 470800704, + "step": 4855 + }, + { + "epoch": 0.47477512710207276, + "grad_norm": 28.986490462424253, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 470896844, + "step": 4856 + }, + { + "epoch": 0.47477512710207276, + "loss": 0.07078960537910461, + "loss_ce": 0.0027201438788324594, + "loss_iou": 0.28515625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 470896844, + "step": 4856 + }, + { + "epoch": 0.4748728979272585, + "grad_norm": 12.929979446298587, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 470993584, + "step": 4857 + }, + { + "epoch": 0.4748728979272585, + "loss": 0.0876481682062149, + "loss_ce": 0.0052049290388822556, + "loss_iou": 0.279296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 470993584, + "step": 4857 + }, + { + "epoch": 0.47497066875244426, + "grad_norm": 5.058423464489702, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 471090408, + "step": 4858 + }, + { + "epoch": 0.47497066875244426, + "loss": 0.07039466500282288, + "loss_ce": 0.008382948115468025, + "loss_iou": 0.3046875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 471090408, + "step": 4858 + }, + { + "epoch": 0.47506843957763, + "grad_norm": 4.645489025938778, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 471186940, + "step": 4859 + }, + { + "epoch": 0.47506843957763, + "loss": 0.05064793676137924, + "loss_ce": 0.007267199456691742, + "loss_iou": 0.39453125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 471186940, + "step": 4859 + }, + { + "epoch": 0.4751662104028158, + "grad_norm": 3.4281226365026995, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 471283676, + "step": 4860 + }, + { + "epoch": 0.4751662104028158, + "loss": 0.050532016903162, + "loss_ce": 0.005152377299964428, + "loss_iou": 0.26171875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 471283676, + "step": 4860 + }, + { + "epoch": 0.47526398122800156, + "grad_norm": 3.6584052479353786, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 471379772, + "step": 4861 + }, + { + "epoch": 0.47526398122800156, + "loss": 0.09881246089935303, + "loss_ce": 0.004421598743647337, + "loss_iou": 0.33203125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 471379772, + "step": 4861 + }, + { + "epoch": 0.4753617520531873, + "grad_norm": 8.838469193881394, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 471477000, + "step": 4862 + }, + { + "epoch": 0.4753617520531873, + "loss": 0.07347303628921509, + "loss_ce": 0.007364329881966114, + "loss_iou": 0.275390625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 471477000, + "step": 4862 + }, + { + "epoch": 0.4754595228783731, + "grad_norm": 6.8839780554785115, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 471573880, + "step": 4863 + }, + { + "epoch": 0.4754595228783731, + "loss": 0.08990463614463806, + "loss_ce": 0.007903908379375935, + "loss_iou": 0.1826171875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 471573880, + "step": 4863 + }, + { + "epoch": 0.47555729370355887, + "grad_norm": 8.155409599758398, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 471670964, + "step": 4864 + }, + { + "epoch": 0.47555729370355887, + "loss": 0.06885938346385956, + "loss_ce": 0.0038721987511962652, + "loss_iou": 0.228515625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 471670964, + "step": 4864 + }, + { + "epoch": 0.4756550645287446, + "grad_norm": 4.636283217545233, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 471768372, + "step": 4865 + }, + { + "epoch": 0.4756550645287446, + "loss": 0.04935769364237785, + "loss_ce": 0.003917018882930279, + "loss_iou": 0.2490234375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 471768372, + "step": 4865 + }, + { + "epoch": 0.47575283535393037, + "grad_norm": 12.778962027367442, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 471866132, + "step": 4866 + }, + { + "epoch": 0.47575283535393037, + "loss": 0.08642520010471344, + "loss_ce": 0.0063318214379251, + "loss_iou": 0.3671875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 471866132, + "step": 4866 + }, + { + "epoch": 0.4758506061791162, + "grad_norm": 6.6088167337976085, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 471963496, + "step": 4867 + }, + { + "epoch": 0.4758506061791162, + "loss": 0.07679983228445053, + "loss_ce": 0.005297147203236818, + "loss_iou": 0.3828125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 471963496, + "step": 4867 + }, + { + "epoch": 0.4759483770043019, + "grad_norm": 6.508889828808363, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 472060728, + "step": 4868 + }, + { + "epoch": 0.4759483770043019, + "loss": 0.07321751117706299, + "loss_ce": 0.004110460169613361, + "loss_iou": 0.271484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 472060728, + "step": 4868 + }, + { + "epoch": 0.4760461478294877, + "grad_norm": 13.863809035485978, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 472158160, + "step": 4869 + }, + { + "epoch": 0.4760461478294877, + "loss": 0.08268355578184128, + "loss_ce": 0.0032463015522807837, + "loss_iou": 0.375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 472158160, + "step": 4869 + }, + { + "epoch": 0.4761439186546734, + "grad_norm": 9.129969077438858, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 472255496, + "step": 4870 + }, + { + "epoch": 0.4761439186546734, + "loss": 0.09023429453372955, + "loss_ce": 0.002343671629205346, + "loss_iou": 0.236328125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 472255496, + "step": 4870 + }, + { + "epoch": 0.47624168947985923, + "grad_norm": 4.981698450974923, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 472351984, + "step": 4871 + }, + { + "epoch": 0.47624168947985923, + "loss": 0.1157807856798172, + "loss_ce": 0.010640094988048077, + "loss_iou": 0.328125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 472351984, + "step": 4871 + }, + { + "epoch": 0.476339460305045, + "grad_norm": 11.736995400928864, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 472448548, + "step": 4872 + }, + { + "epoch": 0.476339460305045, + "loss": 0.08340056240558624, + "loss_ce": 0.009944749996066093, + "loss_iou": 0.3203125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 472448548, + "step": 4872 + }, + { + "epoch": 0.47643723113023073, + "grad_norm": 11.011913823270511, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 472546028, + "step": 4873 + }, + { + "epoch": 0.47643723113023073, + "loss": 0.07381413877010345, + "loss_ce": 0.0047223446890711784, + "loss_iou": 0.248046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 472546028, + "step": 4873 + }, + { + "epoch": 0.4765350019554165, + "grad_norm": 5.498204779260623, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 472642124, + "step": 4874 + }, + { + "epoch": 0.4765350019554165, + "loss": 0.10264179110527039, + "loss_ce": 0.0019032591953873634, + "loss_iou": 0.296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 472642124, + "step": 4874 + }, + { + "epoch": 0.4766327727806023, + "grad_norm": 3.351536332459447, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 472739388, + "step": 4875 + }, + { + "epoch": 0.4766327727806023, + "loss": 0.09795276820659637, + "loss_ce": 0.007834356278181076, + "loss_iou": 0.333984375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 472739388, + "step": 4875 + }, + { + "epoch": 0.47673054360578804, + "grad_norm": 3.2768392630253738, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 472836212, + "step": 4876 + }, + { + "epoch": 0.47673054360578804, + "loss": 0.08005623519420624, + "loss_ce": 0.008034750819206238, + "loss_iou": 0.2255859375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 472836212, + "step": 4876 + }, + { + "epoch": 0.4768283144309738, + "grad_norm": 7.734671925003843, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 472933588, + "step": 4877 + }, + { + "epoch": 0.4768283144309738, + "loss": 0.06852488219738007, + "loss_ce": 0.005422164686024189, + "loss_iou": 0.283203125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 472933588, + "step": 4877 + }, + { + "epoch": 0.47692608525615954, + "grad_norm": 9.130027092089314, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 473030376, + "step": 4878 + }, + { + "epoch": 0.47692608525615954, + "loss": 0.10709856450557709, + "loss_ce": 0.007168750278651714, + "loss_iou": 0.28515625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 473030376, + "step": 4878 + }, + { + "epoch": 0.47702385608134534, + "grad_norm": 5.8275451237859555, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 473128272, + "step": 4879 + }, + { + "epoch": 0.47702385608134534, + "loss": 0.099812351167202, + "loss_ce": 0.0035293959081172943, + "loss_iou": 0.451171875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 473128272, + "step": 4879 + }, + { + "epoch": 0.4771216269065311, + "grad_norm": 17.991464885975464, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 473225204, + "step": 4880 + }, + { + "epoch": 0.4771216269065311, + "loss": 0.09971830248832703, + "loss_ce": 0.003771031042560935, + "loss_iou": 0.359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 473225204, + "step": 4880 + }, + { + "epoch": 0.47721939773171684, + "grad_norm": 10.366617837361426, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 473321264, + "step": 4881 + }, + { + "epoch": 0.47721939773171684, + "loss": 0.10404050350189209, + "loss_ce": 0.008955358527600765, + "loss_iou": 0.240234375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 473321264, + "step": 4881 + }, + { + "epoch": 0.4773171685569026, + "grad_norm": 12.12487576515238, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 473418224, + "step": 4882 + }, + { + "epoch": 0.4773171685569026, + "loss": 0.07146254926919937, + "loss_ce": 0.002981105586513877, + "loss_iou": 0.322265625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 473418224, + "step": 4882 + }, + { + "epoch": 0.4774149393820884, + "grad_norm": 8.586908752465966, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 473514848, + "step": 4883 + }, + { + "epoch": 0.4774149393820884, + "loss": 0.08488201349973679, + "loss_ce": 0.004147762898355722, + "loss_iou": 0.302734375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 473514848, + "step": 4883 + }, + { + "epoch": 0.47751271020727415, + "grad_norm": 6.890624092436724, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 473611116, + "step": 4884 + }, + { + "epoch": 0.47751271020727415, + "loss": 0.06647831946611404, + "loss_ce": 0.004321638494729996, + "loss_iou": 0.259765625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 473611116, + "step": 4884 + }, + { + "epoch": 0.4776104810324599, + "grad_norm": 11.086164579299973, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 473708852, + "step": 4885 + }, + { + "epoch": 0.4776104810324599, + "loss": 0.11367820203304291, + "loss_ce": 0.0033876681700348854, + "loss_iou": 0.33203125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 473708852, + "step": 4885 + }, + { + "epoch": 0.4777082518576457, + "grad_norm": 17.703164528359498, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 473805420, + "step": 4886 + }, + { + "epoch": 0.4777082518576457, + "loss": 0.10282152146100998, + "loss_ce": 0.006096056196838617, + "loss_iou": 0.349609375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 473805420, + "step": 4886 + }, + { + "epoch": 0.47780602268283145, + "grad_norm": 9.727819561819745, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 473903176, + "step": 4887 + }, + { + "epoch": 0.47780602268283145, + "loss": 0.08457213640213013, + "loss_ce": 0.004288018681108952, + "loss_iou": 0.421875, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 473903176, + "step": 4887 + }, + { + "epoch": 0.4779037935080172, + "grad_norm": 7.9352148048282265, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 474000712, + "step": 4888 + }, + { + "epoch": 0.4779037935080172, + "loss": 0.04879060015082359, + "loss_ce": 0.005730299279093742, + "loss_iou": 0.27734375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 474000712, + "step": 4888 + }, + { + "epoch": 0.47800156433320296, + "grad_norm": 6.788556505404868, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 474097816, + "step": 4889 + }, + { + "epoch": 0.47800156433320296, + "loss": 0.05937068909406662, + "loss_ce": 0.004210169427096844, + "loss_iou": 0.375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 474097816, + "step": 4889 + }, + { + "epoch": 0.47809933515838876, + "grad_norm": 9.849207854412713, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 474193424, + "step": 4890 + }, + { + "epoch": 0.47809933515838876, + "loss": 0.07559885829687119, + "loss_ce": 0.004706523381173611, + "loss_iou": 0.318359375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 474193424, + "step": 4890 + }, + { + "epoch": 0.4781971059835745, + "grad_norm": 26.834146096967814, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 474290452, + "step": 4891 + }, + { + "epoch": 0.4781971059835745, + "loss": 0.05989513546228409, + "loss_ce": 0.004093741066753864, + "loss_iou": 0.283203125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 474290452, + "step": 4891 + }, + { + "epoch": 0.47829487680876026, + "grad_norm": 22.394627455267955, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 474387072, + "step": 4892 + }, + { + "epoch": 0.47829487680876026, + "loss": 0.08852124214172363, + "loss_ce": 0.006139038596302271, + "loss_iou": 0.3359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 474387072, + "step": 4892 + }, + { + "epoch": 0.478392647633946, + "grad_norm": 3.5695037004832675, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 474484436, + "step": 4893 + }, + { + "epoch": 0.478392647633946, + "loss": 0.054820671677589417, + "loss_ce": 0.0009266294655390084, + "loss_iou": 0.322265625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 474484436, + "step": 4893 + }, + { + "epoch": 0.4784904184591318, + "grad_norm": 10.482798788925031, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 474582068, + "step": 4894 + }, + { + "epoch": 0.4784904184591318, + "loss": 0.07733137905597687, + "loss_ce": 0.002670124638825655, + "loss_iou": 0.3359375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 474582068, + "step": 4894 + }, + { + "epoch": 0.47858818928431757, + "grad_norm": 8.570950405938989, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 474679152, + "step": 4895 + }, + { + "epoch": 0.47858818928431757, + "loss": 0.07621246576309204, + "loss_ce": 0.0038705472834408283, + "loss_iou": 0.322265625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 474679152, + "step": 4895 + }, + { + "epoch": 0.4786859601095033, + "grad_norm": 13.8584137530803, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 474775788, + "step": 4896 + }, + { + "epoch": 0.4786859601095033, + "loss": 0.10825890302658081, + "loss_ce": 0.004895868245512247, + "loss_iou": 0.25, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 474775788, + "step": 4896 + }, + { + "epoch": 0.47878373093468907, + "grad_norm": 17.670565057990665, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 474873164, + "step": 4897 + }, + { + "epoch": 0.47878373093468907, + "loss": 0.08472296595573425, + "loss_ce": 0.004980538040399551, + "loss_iou": 0.37109375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 474873164, + "step": 4897 + }, + { + "epoch": 0.4788815017598749, + "grad_norm": 9.176585202646107, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 474970808, + "step": 4898 + }, + { + "epoch": 0.4788815017598749, + "loss": 0.0996188297867775, + "loss_ce": 0.0013331552036106586, + "loss_iou": 0.33203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 474970808, + "step": 4898 + }, + { + "epoch": 0.4789792725850606, + "grad_norm": 9.025754276719201, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 475068344, + "step": 4899 + }, + { + "epoch": 0.4789792725850606, + "loss": 0.09067784994840622, + "loss_ce": 0.004152882844209671, + "loss_iou": 0.306640625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 475068344, + "step": 4899 + }, + { + "epoch": 0.4790770434102464, + "grad_norm": 3.1848273778719656, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 475165616, + "step": 4900 + }, + { + "epoch": 0.4790770434102464, + "loss": 0.05011824518442154, + "loss_ce": 0.006722250487655401, + "loss_iou": 0.353515625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 475165616, + "step": 4900 + }, + { + "epoch": 0.4791748142354321, + "grad_norm": 7.915298105662986, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 475262392, + "step": 4901 + }, + { + "epoch": 0.4791748142354321, + "loss": 0.07228831946849823, + "loss_ce": 0.005309872329235077, + "loss_iou": 0.275390625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 475262392, + "step": 4901 + }, + { + "epoch": 0.47927258506061793, + "grad_norm": 4.410427071617436, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 475359444, + "step": 4902 + }, + { + "epoch": 0.47927258506061793, + "loss": 0.12830287218093872, + "loss_ce": 0.004752452950924635, + "loss_iou": 0.337890625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 475359444, + "step": 4902 + }, + { + "epoch": 0.4793703558858037, + "grad_norm": 5.683197956772757, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 475456616, + "step": 4903 + }, + { + "epoch": 0.4793703558858037, + "loss": 0.08636192977428436, + "loss_ce": 0.009964993223547935, + "loss_iou": 0.25390625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 475456616, + "step": 4903 + }, + { + "epoch": 0.47946812671098943, + "grad_norm": 6.578819555694828, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 475553184, + "step": 4904 + }, + { + "epoch": 0.47946812671098943, + "loss": 0.0568719282746315, + "loss_ce": 0.003557719988748431, + "loss_iou": 0.26953125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 475553184, + "step": 4904 + }, + { + "epoch": 0.4795658975361752, + "grad_norm": 3.035134719525996, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 475649560, + "step": 4905 + }, + { + "epoch": 0.4795658975361752, + "loss": 0.06760412454605103, + "loss_ce": 0.008697565644979477, + "loss_iou": 0.265625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 475649560, + "step": 4905 + }, + { + "epoch": 0.479663668361361, + "grad_norm": 6.089100420967773, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 475746684, + "step": 4906 + }, + { + "epoch": 0.479663668361361, + "loss": 0.10810907185077667, + "loss_ce": 0.0028844582848250866, + "loss_iou": 0.298828125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 475746684, + "step": 4906 + }, + { + "epoch": 0.47976143918654673, + "grad_norm": 13.96807380265843, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 475843948, + "step": 4907 + }, + { + "epoch": 0.47976143918654673, + "loss": 0.054020244628190994, + "loss_ce": 0.006618817336857319, + "loss_iou": 0.28125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 475843948, + "step": 4907 + }, + { + "epoch": 0.4798592100117325, + "grad_norm": 8.321740528694315, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 475941248, + "step": 4908 + }, + { + "epoch": 0.4798592100117325, + "loss": 0.11849167943000793, + "loss_ce": 0.003348848782479763, + "loss_iou": 0.314453125, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 475941248, + "step": 4908 + }, + { + "epoch": 0.4799569808369183, + "grad_norm": 20.545556641776034, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 476039580, + "step": 4909 + }, + { + "epoch": 0.4799569808369183, + "loss": 0.049607232213020325, + "loss_ce": 0.0038308613002300262, + "loss_iou": 0.341796875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 476039580, + "step": 4909 + }, + { + "epoch": 0.48005475166210404, + "grad_norm": 16.0185959614159, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 476137248, + "step": 4910 + }, + { + "epoch": 0.48005475166210404, + "loss": 0.08164741843938828, + "loss_ce": 0.005246661603450775, + "loss_iou": 0.33203125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 476137248, + "step": 4910 + }, + { + "epoch": 0.4801525224872898, + "grad_norm": 14.16934303225207, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 476234776, + "step": 4911 + }, + { + "epoch": 0.4801525224872898, + "loss": 0.08341362327337265, + "loss_ce": 0.003213427495211363, + "loss_iou": 0.349609375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 476234776, + "step": 4911 + }, + { + "epoch": 0.48025029331247554, + "grad_norm": 4.5643298013034475, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 476332152, + "step": 4912 + }, + { + "epoch": 0.48025029331247554, + "loss": 0.07218759506940842, + "loss_ce": 0.0058271209709346294, + "loss_iou": 0.48046875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 476332152, + "step": 4912 + }, + { + "epoch": 0.48034806413766135, + "grad_norm": 4.898568329760414, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 476430484, + "step": 4913 + }, + { + "epoch": 0.48034806413766135, + "loss": 0.09422547370195389, + "loss_ce": 0.004473276436328888, + "loss_iou": 0.41015625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 476430484, + "step": 4913 + }, + { + "epoch": 0.4804458349628471, + "grad_norm": 9.813927699235228, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 476528208, + "step": 4914 + }, + { + "epoch": 0.4804458349628471, + "loss": 0.07192733883857727, + "loss_ce": 0.005490569397807121, + "loss_iou": 0.421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 476528208, + "step": 4914 + }, + { + "epoch": 0.48054360578803285, + "grad_norm": 7.415242723198554, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 476625024, + "step": 4915 + }, + { + "epoch": 0.48054360578803285, + "loss": 0.08950607478618622, + "loss_ce": 0.005872651003301144, + "loss_iou": 0.3203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 476625024, + "step": 4915 + }, + { + "epoch": 0.4806413766132186, + "grad_norm": 8.292065287272887, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 476721904, + "step": 4916 + }, + { + "epoch": 0.4806413766132186, + "loss": 0.07483388483524323, + "loss_ce": 0.0054979482665658, + "loss_iou": 0.369140625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 476721904, + "step": 4916 + }, + { + "epoch": 0.4807391474384044, + "grad_norm": 15.858703520874078, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 476818544, + "step": 4917 + }, + { + "epoch": 0.4807391474384044, + "loss": 0.09919942170381546, + "loss_ce": 0.006387838162481785, + "loss_iou": 0.294921875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 476818544, + "step": 4917 + }, + { + "epoch": 0.48083691826359015, + "grad_norm": 15.724949072372516, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 476916656, + "step": 4918 + }, + { + "epoch": 0.48083691826359015, + "loss": 0.07759606838226318, + "loss_ce": 0.005612728651612997, + "loss_iou": 0.421875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 476916656, + "step": 4918 + }, + { + "epoch": 0.4809346890887759, + "grad_norm": 18.425260087507173, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 477013516, + "step": 4919 + }, + { + "epoch": 0.4809346890887759, + "loss": 0.08689205348491669, + "loss_ce": 0.006600303575396538, + "loss_iou": 0.328125, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 477013516, + "step": 4919 + }, + { + "epoch": 0.48103245991396165, + "grad_norm": 11.61725935521942, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 477110084, + "step": 4920 + }, + { + "epoch": 0.48103245991396165, + "loss": 0.06393823772668839, + "loss_ce": 0.00624475535005331, + "loss_iou": 0.349609375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 477110084, + "step": 4920 + }, + { + "epoch": 0.48113023073914746, + "grad_norm": 3.5377901421403233, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 477207108, + "step": 4921 + }, + { + "epoch": 0.48113023073914746, + "loss": 0.05434533953666687, + "loss_ce": 0.0054084984585642815, + "loss_iou": 0.26171875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 477207108, + "step": 4921 + }, + { + "epoch": 0.4812280015643332, + "grad_norm": 10.703719809008275, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 477303944, + "step": 4922 + }, + { + "epoch": 0.4812280015643332, + "loss": 0.06214163824915886, + "loss_ce": 0.0029985718429088593, + "loss_iou": 0.294921875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 477303944, + "step": 4922 + }, + { + "epoch": 0.48132577238951896, + "grad_norm": 4.487328326062419, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 477401100, + "step": 4923 + }, + { + "epoch": 0.48132577238951896, + "loss": 0.09011482447385788, + "loss_ce": 0.011478654108941555, + "loss_iou": 0.345703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 477401100, + "step": 4923 + }, + { + "epoch": 0.4814235432147047, + "grad_norm": 9.917183501632897, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 477497148, + "step": 4924 + }, + { + "epoch": 0.4814235432147047, + "loss": 0.08368557691574097, + "loss_ce": 0.005072289612144232, + "loss_iou": 0.287109375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 477497148, + "step": 4924 + }, + { + "epoch": 0.4815213140398905, + "grad_norm": 25.70707888309502, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 477592996, + "step": 4925 + }, + { + "epoch": 0.4815213140398905, + "loss": 0.07022921741008759, + "loss_ce": 0.007256195414811373, + "loss_iou": 0.2255859375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 477592996, + "step": 4925 + }, + { + "epoch": 0.48161908486507626, + "grad_norm": 9.05403551422111, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 477689648, + "step": 4926 + }, + { + "epoch": 0.48161908486507626, + "loss": 0.08833631873130798, + "loss_ce": 0.009360644966363907, + "loss_iou": 0.2890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 477689648, + "step": 4926 + }, + { + "epoch": 0.481716855690262, + "grad_norm": 3.321276024197622, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 477786704, + "step": 4927 + }, + { + "epoch": 0.481716855690262, + "loss": 0.09811553359031677, + "loss_ce": 0.00528106139972806, + "loss_iou": 0.291015625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 477786704, + "step": 4927 + }, + { + "epoch": 0.48181462651544776, + "grad_norm": 47.46380021322206, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 477883504, + "step": 4928 + }, + { + "epoch": 0.48181462651544776, + "loss": 0.11149773001670837, + "loss_ce": 0.012498700991272926, + "loss_iou": 0.361328125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 477883504, + "step": 4928 + }, + { + "epoch": 0.48191239734063357, + "grad_norm": 5.973310657625129, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 477980416, + "step": 4929 + }, + { + "epoch": 0.48191239734063357, + "loss": 0.09164941310882568, + "loss_ce": 0.003362061455845833, + "loss_iou": 0.427734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 477980416, + "step": 4929 + }, + { + "epoch": 0.4820101681658193, + "grad_norm": 12.850584804910403, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 478077116, + "step": 4930 + }, + { + "epoch": 0.4820101681658193, + "loss": 0.11585952341556549, + "loss_ce": 0.011886140331625938, + "loss_iou": 0.2294921875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 478077116, + "step": 4930 + }, + { + "epoch": 0.48210793899100507, + "grad_norm": 8.089108880149276, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 478173988, + "step": 4931 + }, + { + "epoch": 0.48210793899100507, + "loss": 0.07845714688301086, + "loss_ce": 0.005802421364933252, + "loss_iou": 0.318359375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 478173988, + "step": 4931 + }, + { + "epoch": 0.4822057098161909, + "grad_norm": 6.774580295273411, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 478269956, + "step": 4932 + }, + { + "epoch": 0.4822057098161909, + "loss": 0.06275978684425354, + "loss_ce": 0.006348049268126488, + "loss_iou": 0.236328125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 478269956, + "step": 4932 + }, + { + "epoch": 0.4823034806413766, + "grad_norm": 8.216338676165844, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 478368224, + "step": 4933 + }, + { + "epoch": 0.4823034806413766, + "loss": 0.06161706522107124, + "loss_ce": 0.008546997793018818, + "loss_iou": 0.28515625, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 478368224, + "step": 4933 + }, + { + "epoch": 0.4824012514665624, + "grad_norm": 3.4163278968308326, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 478464756, + "step": 4934 + }, + { + "epoch": 0.4824012514665624, + "loss": 0.07479559630155563, + "loss_ce": 0.005047667771577835, + "loss_iou": 0.345703125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 478464756, + "step": 4934 + }, + { + "epoch": 0.4824990222917481, + "grad_norm": 6.106104615528919, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 478561316, + "step": 4935 + }, + { + "epoch": 0.4824990222917481, + "loss": 0.10457643121480942, + "loss_ce": 0.004906023852527142, + "loss_iou": 0.33984375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 478561316, + "step": 4935 + }, + { + "epoch": 0.48259679311693393, + "grad_norm": 23.102001173152253, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 478658036, + "step": 4936 + }, + { + "epoch": 0.48259679311693393, + "loss": 0.06040000915527344, + "loss_ce": 0.00397300673648715, + "loss_iou": 0.2451171875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 478658036, + "step": 4936 + }, + { + "epoch": 0.4826945639421197, + "grad_norm": 37.12987675207433, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 478754676, + "step": 4937 + }, + { + "epoch": 0.4826945639421197, + "loss": 0.06899019330739975, + "loss_ce": 0.006658040452748537, + "loss_iou": 0.322265625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 478754676, + "step": 4937 + }, + { + "epoch": 0.48279233476730543, + "grad_norm": 12.914165250506917, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 478851572, + "step": 4938 + }, + { + "epoch": 0.48279233476730543, + "loss": 0.08505193889141083, + "loss_ce": 0.0033106086775660515, + "loss_iou": 0.412109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 478851572, + "step": 4938 + }, + { + "epoch": 0.4828901055924912, + "grad_norm": 4.929663409925358, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 478948460, + "step": 4939 + }, + { + "epoch": 0.4828901055924912, + "loss": 0.08392848074436188, + "loss_ce": 0.007436164654791355, + "loss_iou": 0.333984375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 478948460, + "step": 4939 + }, + { + "epoch": 0.482987876417677, + "grad_norm": 6.6219984373944305, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 479045276, + "step": 4940 + }, + { + "epoch": 0.482987876417677, + "loss": 0.08567618578672409, + "loss_ce": 0.0021343191619962454, + "loss_iou": 0.34375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 479045276, + "step": 4940 + }, + { + "epoch": 0.48308564724286274, + "grad_norm": 4.517184460165123, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 479143432, + "step": 4941 + }, + { + "epoch": 0.48308564724286274, + "loss": 0.08888521045446396, + "loss_ce": 0.00492372689768672, + "loss_iou": 0.294921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 479143432, + "step": 4941 + }, + { + "epoch": 0.4831834180680485, + "grad_norm": 8.504821239626754, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 479239852, + "step": 4942 + }, + { + "epoch": 0.4831834180680485, + "loss": 0.08690546452999115, + "loss_ce": 0.0028447918593883514, + "loss_iou": 0.28515625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 479239852, + "step": 4942 + }, + { + "epoch": 0.48328118889323424, + "grad_norm": 21.55540040648106, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 479337004, + "step": 4943 + }, + { + "epoch": 0.48328118889323424, + "loss": 0.08909322321414948, + "loss_ce": 0.00581074645742774, + "loss_iou": 0.4453125, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 479337004, + "step": 4943 + }, + { + "epoch": 0.48337895971842004, + "grad_norm": 4.380204733869524, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 479434076, + "step": 4944 + }, + { + "epoch": 0.48337895971842004, + "loss": 0.05902174487709999, + "loss_ce": 0.007080825045704842, + "loss_iou": 0.39453125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 479434076, + "step": 4944 + }, + { + "epoch": 0.4834767305436058, + "grad_norm": 7.314605904757062, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 479530432, + "step": 4945 + }, + { + "epoch": 0.4834767305436058, + "loss": 0.09665748476982117, + "loss_ce": 0.006394119001924992, + "loss_iou": 0.283203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 479530432, + "step": 4945 + }, + { + "epoch": 0.48357450136879154, + "grad_norm": 16.0258937197233, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 479627988, + "step": 4946 + }, + { + "epoch": 0.48357450136879154, + "loss": 0.06007097661495209, + "loss_ce": 0.005146961659193039, + "loss_iou": 0.31640625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 479627988, + "step": 4946 + }, + { + "epoch": 0.4836722721939773, + "grad_norm": 13.850278820095973, + "learning_rate": 5e-05, + "loss": 0.12, + "num_input_tokens_seen": 479725184, + "step": 4947 + }, + { + "epoch": 0.4836722721939773, + "loss": 0.15555202960968018, + "loss_ce": 0.010440954007208347, + "loss_iou": 0.228515625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 479725184, + "step": 4947 + }, + { + "epoch": 0.4837700430191631, + "grad_norm": 7.825380160529671, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 479821836, + "step": 4948 + }, + { + "epoch": 0.4837700430191631, + "loss": 0.07167968153953552, + "loss_ce": 0.0014892533654347062, + "loss_iou": 0.318359375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 479821836, + "step": 4948 + }, + { + "epoch": 0.48386781384434885, + "grad_norm": 6.746739890597927, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 479918976, + "step": 4949 + }, + { + "epoch": 0.48386781384434885, + "loss": 0.08691823482513428, + "loss_ce": 0.0034602887462824583, + "loss_iou": 0.3359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 479918976, + "step": 4949 + }, + { + "epoch": 0.4839655846695346, + "grad_norm": 6.318651662433785, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 480016396, + "step": 4950 + }, + { + "epoch": 0.4839655846695346, + "loss": 0.09733663499355316, + "loss_ce": 0.006012781523168087, + "loss_iou": 0.26171875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 480016396, + "step": 4950 + }, + { + "epoch": 0.48406335549472035, + "grad_norm": 5.590685385530841, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 480112940, + "step": 4951 + }, + { + "epoch": 0.48406335549472035, + "loss": 0.09070873260498047, + "loss_ce": 0.0031843262258917093, + "loss_iou": 0.2236328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 480112940, + "step": 4951 + }, + { + "epoch": 0.48416112631990615, + "grad_norm": 5.085194985186993, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 480210684, + "step": 4952 + }, + { + "epoch": 0.48416112631990615, + "loss": 0.08246935904026031, + "loss_ce": 0.002299678046256304, + "loss_iou": 0.404296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 480210684, + "step": 4952 + }, + { + "epoch": 0.4842588971450919, + "grad_norm": 4.466510905500791, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 480308276, + "step": 4953 + }, + { + "epoch": 0.4842588971450919, + "loss": 0.09122101962566376, + "loss_ce": 0.004894424229860306, + "loss_iou": 0.3828125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 480308276, + "step": 4953 + }, + { + "epoch": 0.48435666797027765, + "grad_norm": 13.240969395577753, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 480405264, + "step": 4954 + }, + { + "epoch": 0.48435666797027765, + "loss": 0.08390641212463379, + "loss_ce": 0.002611396601423621, + "loss_iou": 0.27734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 480405264, + "step": 4954 + }, + { + "epoch": 0.48445443879546346, + "grad_norm": 16.40933053798198, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 480502376, + "step": 4955 + }, + { + "epoch": 0.48445443879546346, + "loss": 0.05924138426780701, + "loss_ce": 0.0033331799786537886, + "loss_iou": 0.271484375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 480502376, + "step": 4955 + }, + { + "epoch": 0.4845522096206492, + "grad_norm": 5.409654873114165, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 480598948, + "step": 4956 + }, + { + "epoch": 0.4845522096206492, + "loss": 0.07047349214553833, + "loss_ce": 0.007241062819957733, + "loss_iou": 0.2333984375, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 480598948, + "step": 4956 + }, + { + "epoch": 0.48464998044583496, + "grad_norm": 5.415724031034928, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 480696192, + "step": 4957 + }, + { + "epoch": 0.48464998044583496, + "loss": 0.08446487784385681, + "loss_ce": 0.0018690491560846567, + "loss_iou": 0.267578125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 480696192, + "step": 4957 + }, + { + "epoch": 0.4847477512710207, + "grad_norm": 3.6502288034447763, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 480793608, + "step": 4958 + }, + { + "epoch": 0.4847477512710207, + "loss": 0.07221024483442307, + "loss_ce": 0.0025386144407093525, + "loss_iou": 0.3515625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 480793608, + "step": 4958 + }, + { + "epoch": 0.4848455220962065, + "grad_norm": 6.840610727243731, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 480890344, + "step": 4959 + }, + { + "epoch": 0.4848455220962065, + "loss": 0.07535384595394135, + "loss_ce": 0.005041344091296196, + "loss_iou": 0.193359375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 480890344, + "step": 4959 + }, + { + "epoch": 0.48494329292139227, + "grad_norm": 6.808739938526809, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 480986232, + "step": 4960 + }, + { + "epoch": 0.48494329292139227, + "loss": 0.11063364148139954, + "loss_ce": 0.01130274124443531, + "loss_iou": 0.177734375, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 480986232, + "step": 4960 + }, + { + "epoch": 0.485041063746578, + "grad_norm": 4.395106051285406, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 481083276, + "step": 4961 + }, + { + "epoch": 0.485041063746578, + "loss": 0.08112137019634247, + "loss_ce": 0.003019255120307207, + "loss_iou": 0.291015625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 481083276, + "step": 4961 + }, + { + "epoch": 0.48513883457176377, + "grad_norm": 7.154483993721621, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 481180128, + "step": 4962 + }, + { + "epoch": 0.48513883457176377, + "loss": 0.08734873682260513, + "loss_ce": 0.008796490728855133, + "loss_iou": 0.310546875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 481180128, + "step": 4962 + }, + { + "epoch": 0.48523660539694957, + "grad_norm": 2.518835353671387, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 481276928, + "step": 4963 + }, + { + "epoch": 0.48523660539694957, + "loss": 0.077082060277462, + "loss_ce": 0.007891080342233181, + "loss_iou": 0.2373046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 481276928, + "step": 4963 + }, + { + "epoch": 0.4853343762221353, + "grad_norm": 8.143221079142693, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 481373504, + "step": 4964 + }, + { + "epoch": 0.4853343762221353, + "loss": 0.08967834711074829, + "loss_ce": 0.003771366085857153, + "loss_iou": 0.23046875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 481373504, + "step": 4964 + }, + { + "epoch": 0.48543214704732107, + "grad_norm": 5.566453441272931, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 481471344, + "step": 4965 + }, + { + "epoch": 0.48543214704732107, + "loss": 0.07714052498340607, + "loss_ce": 0.006004047580063343, + "loss_iou": 0.353515625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 481471344, + "step": 4965 + }, + { + "epoch": 0.4855299178725068, + "grad_norm": 11.270987011668305, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 481566916, + "step": 4966 + }, + { + "epoch": 0.4855299178725068, + "loss": 0.0737234503030777, + "loss_ce": 0.007591859437525272, + "loss_iou": 0.306640625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 481566916, + "step": 4966 + }, + { + "epoch": 0.4856276886976926, + "grad_norm": 5.446774804123279, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 481664704, + "step": 4967 + }, + { + "epoch": 0.4856276886976926, + "loss": 0.08137772977352142, + "loss_ce": 0.004229296930134296, + "loss_iou": 0.369140625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 481664704, + "step": 4967 + }, + { + "epoch": 0.4857254595228784, + "grad_norm": 9.495257334688304, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 481761624, + "step": 4968 + }, + { + "epoch": 0.4857254595228784, + "loss": 0.07719630002975464, + "loss_ce": 0.003069101134315133, + "loss_iou": 0.2392578125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 481761624, + "step": 4968 + }, + { + "epoch": 0.48582323034806413, + "grad_norm": 12.23505674709528, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 481859432, + "step": 4969 + }, + { + "epoch": 0.48582323034806413, + "loss": 0.07995207607746124, + "loss_ce": 0.009960012510418892, + "loss_iou": 0.423828125, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 481859432, + "step": 4969 + }, + { + "epoch": 0.4859210011732499, + "grad_norm": 13.72962639091938, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 481956616, + "step": 4970 + }, + { + "epoch": 0.4859210011732499, + "loss": 0.08460500836372375, + "loss_ce": 0.00593069801107049, + "loss_iou": 0.34375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 481956616, + "step": 4970 + }, + { + "epoch": 0.4860187719984357, + "grad_norm": 2.8772699274167257, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 482052504, + "step": 4971 + }, + { + "epoch": 0.4860187719984357, + "loss": 0.0763835459947586, + "loss_ce": 0.004178957547992468, + "loss_iou": 0.28125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 482052504, + "step": 4971 + }, + { + "epoch": 0.48611654282362143, + "grad_norm": 4.4946774439775155, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 482149936, + "step": 4972 + }, + { + "epoch": 0.48611654282362143, + "loss": 0.07665207982063293, + "loss_ce": 0.004172832239419222, + "loss_iou": 0.3046875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 482149936, + "step": 4972 + }, + { + "epoch": 0.4862143136488072, + "grad_norm": 12.526941028031798, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 482247844, + "step": 4973 + }, + { + "epoch": 0.4862143136488072, + "loss": 0.07402462512254715, + "loss_ce": 0.007374230306595564, + "loss_iou": 0.359375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 482247844, + "step": 4973 + }, + { + "epoch": 0.48631208447399293, + "grad_norm": 28.23223650127358, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 482344380, + "step": 4974 + }, + { + "epoch": 0.48631208447399293, + "loss": 0.08368609100580215, + "loss_ce": 0.006186701823025942, + "loss_iou": 0.291015625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 482344380, + "step": 4974 + }, + { + "epoch": 0.48640985529917874, + "grad_norm": 16.058623554660368, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 482440888, + "step": 4975 + }, + { + "epoch": 0.48640985529917874, + "loss": 0.0660916417837143, + "loss_ce": 0.00481234397739172, + "loss_iou": 0.294921875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 482440888, + "step": 4975 + }, + { + "epoch": 0.4865076261243645, + "grad_norm": 6.036785173245089, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 482538684, + "step": 4976 + }, + { + "epoch": 0.4865076261243645, + "loss": 0.08923934400081635, + "loss_ce": 0.003210294060409069, + "loss_iou": 0.328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 482538684, + "step": 4976 + }, + { + "epoch": 0.48660539694955024, + "grad_norm": 5.57033300210812, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 482635688, + "step": 4977 + }, + { + "epoch": 0.48660539694955024, + "loss": 0.13259641826152802, + "loss_ce": 0.008267802186310291, + "loss_iou": 0.37890625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 482635688, + "step": 4977 + }, + { + "epoch": 0.48670316777473605, + "grad_norm": 7.645741356885795, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 482733480, + "step": 4978 + }, + { + "epoch": 0.48670316777473605, + "loss": 0.08839651942253113, + "loss_ce": 0.011064969003200531, + "loss_iou": 0.41015625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 482733480, + "step": 4978 + }, + { + "epoch": 0.4868009385999218, + "grad_norm": 4.970996450187142, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 482830296, + "step": 4979 + }, + { + "epoch": 0.4868009385999218, + "loss": 0.09025458991527557, + "loss_ce": 0.00648383516818285, + "loss_iou": 0.365234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 482830296, + "step": 4979 + }, + { + "epoch": 0.48689870942510755, + "grad_norm": 9.952435194537326, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 482926928, + "step": 4980 + }, + { + "epoch": 0.48689870942510755, + "loss": 0.07545029371976852, + "loss_ce": 0.003226633882150054, + "loss_iou": 0.27734375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 482926928, + "step": 4980 + }, + { + "epoch": 0.4869964802502933, + "grad_norm": 8.285609602254377, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 483023656, + "step": 4981 + }, + { + "epoch": 0.4869964802502933, + "loss": 0.09470412880182266, + "loss_ce": 0.005196073092520237, + "loss_iou": 0.443359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 483023656, + "step": 4981 + }, + { + "epoch": 0.4870942510754791, + "grad_norm": 7.2957954417333415, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 483119488, + "step": 4982 + }, + { + "epoch": 0.4870942510754791, + "loss": 0.08865048736333847, + "loss_ce": 0.012766622006893158, + "loss_iou": 0.34765625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 483119488, + "step": 4982 + }, + { + "epoch": 0.48719202190066485, + "grad_norm": 7.039568370063708, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 483217148, + "step": 4983 + }, + { + "epoch": 0.48719202190066485, + "loss": 0.06726819276809692, + "loss_ce": 0.004463016055524349, + "loss_iou": 0.376953125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 483217148, + "step": 4983 + }, + { + "epoch": 0.4872897927258506, + "grad_norm": 3.915050766195567, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 483313984, + "step": 4984 + }, + { + "epoch": 0.4872897927258506, + "loss": 0.07305176556110382, + "loss_ce": 0.006416638847440481, + "loss_iou": 0.3359375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 483313984, + "step": 4984 + }, + { + "epoch": 0.48738756355103635, + "grad_norm": 3.984944255770141, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 483410532, + "step": 4985 + }, + { + "epoch": 0.48738756355103635, + "loss": 0.09074319899082184, + "loss_ce": 0.005797517951577902, + "loss_iou": 0.34375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 483410532, + "step": 4985 + }, + { + "epoch": 0.48748533437622216, + "grad_norm": 4.328087117108046, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 483507504, + "step": 4986 + }, + { + "epoch": 0.48748533437622216, + "loss": 0.0811186134815216, + "loss_ce": 0.01051619928330183, + "loss_iou": 0.267578125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 483507504, + "step": 4986 + }, + { + "epoch": 0.4875831052014079, + "grad_norm": 5.745236998648772, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 483604508, + "step": 4987 + }, + { + "epoch": 0.4875831052014079, + "loss": 0.06847754865884781, + "loss_ce": 0.0044364118948578835, + "loss_iou": 0.38671875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 483604508, + "step": 4987 + }, + { + "epoch": 0.48768087602659366, + "grad_norm": 6.466872745702765, + "learning_rate": 5e-05, + "loss": 0.1308, + "num_input_tokens_seen": 483700592, + "step": 4988 + }, + { + "epoch": 0.48768087602659366, + "loss": 0.1604325771331787, + "loss_ce": 0.009736779145896435, + "loss_iou": 0.32421875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 483700592, + "step": 4988 + }, + { + "epoch": 0.4877786468517794, + "grad_norm": 8.640951771059937, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 483798212, + "step": 4989 + }, + { + "epoch": 0.4877786468517794, + "loss": 0.06205899640917778, + "loss_ce": 0.004312107805162668, + "loss_iou": 0.30078125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 483798212, + "step": 4989 + }, + { + "epoch": 0.4878764176769652, + "grad_norm": 31.76568948939951, + "learning_rate": 5e-05, + "loss": 0.1436, + "num_input_tokens_seen": 483895240, + "step": 4990 + }, + { + "epoch": 0.4878764176769652, + "loss": 0.16506969928741455, + "loss_ce": 0.006118902005255222, + "loss_iou": 0.337890625, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 483895240, + "step": 4990 + }, + { + "epoch": 0.48797418850215096, + "grad_norm": 5.8808357045577075, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 483992308, + "step": 4991 + }, + { + "epoch": 0.48797418850215096, + "loss": 0.11366786062717438, + "loss_ce": 0.004323382396250963, + "loss_iou": 0.423828125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 483992308, + "step": 4991 + }, + { + "epoch": 0.4880719593273367, + "grad_norm": 3.5827622190352506, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 484089720, + "step": 4992 + }, + { + "epoch": 0.4880719593273367, + "loss": 0.15468043088912964, + "loss_ce": 0.004930666647851467, + "loss_iou": 0.28125, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 484089720, + "step": 4992 + }, + { + "epoch": 0.48816973015252246, + "grad_norm": 13.453009491416909, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 484186940, + "step": 4993 + }, + { + "epoch": 0.48816973015252246, + "loss": 0.12783345580101013, + "loss_ce": 0.015391441062092781, + "loss_iou": 0.2734375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 484186940, + "step": 4993 + }, + { + "epoch": 0.48826750097770827, + "grad_norm": 7.4410499763969975, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 484283316, + "step": 4994 + }, + { + "epoch": 0.48826750097770827, + "loss": 0.1019275039434433, + "loss_ce": 0.009123541414737701, + "loss_iou": 0.3828125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 484283316, + "step": 4994 + }, + { + "epoch": 0.488365271802894, + "grad_norm": 4.5040132818483665, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 484379724, + "step": 4995 + }, + { + "epoch": 0.488365271802894, + "loss": 0.09679470211267471, + "loss_ce": 0.0058370595797896385, + "loss_iou": 0.37890625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 484379724, + "step": 4995 + }, + { + "epoch": 0.48846304262807977, + "grad_norm": 2.5854916338224383, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 484476152, + "step": 4996 + }, + { + "epoch": 0.48846304262807977, + "loss": 0.055164363235235214, + "loss_ce": 0.004825619049370289, + "loss_iou": 0.328125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 484476152, + "step": 4996 + }, + { + "epoch": 0.4885608134532655, + "grad_norm": 15.383360780736128, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 484573424, + "step": 4997 + }, + { + "epoch": 0.4885608134532655, + "loss": 0.12675225734710693, + "loss_ce": 0.003644339507445693, + "loss_iou": 0.353515625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 484573424, + "step": 4997 + }, + { + "epoch": 0.4886585842784513, + "grad_norm": 10.146920389196609, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 484669664, + "step": 4998 + }, + { + "epoch": 0.4886585842784513, + "loss": 0.09690211713314056, + "loss_ce": 0.007561907172203064, + "loss_iou": 0.2734375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 484669664, + "step": 4998 + }, + { + "epoch": 0.4887563551036371, + "grad_norm": 3.7206702897073547, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 484766628, + "step": 4999 + }, + { + "epoch": 0.4887563551036371, + "loss": 0.07273252308368683, + "loss_ce": 0.009805277921259403, + "loss_iou": 0.314453125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 484766628, + "step": 4999 + }, + { + "epoch": 0.4888541259288228, + "grad_norm": 8.626738853397839, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 484863484, + "step": 5000 + }, + { + "epoch": 0.4888541259288228, + "eval_seeclick_CIoU": 0.5299190282821655, + "eval_seeclick_GIoU": 0.5339105576276779, + "eval_seeclick_IoU": 0.5675827115774155, + "eval_seeclick_MAE_all": 0.06433393992483616, + "eval_seeclick_MAE_h": 0.03221435472369194, + "eval_seeclick_MAE_w": 0.09282310679554939, + "eval_seeclick_MAE_x": 0.1003081277012825, + "eval_seeclick_MAE_y": 0.03199019469320774, + "eval_seeclick_NUM_probability": 0.9999939501285553, + "eval_seeclick_inside_bbox": 0.8764204680919647, + "eval_seeclick_loss": 0.24385325610637665, + "eval_seeclick_loss_ce": 0.010358165483921766, + "eval_seeclick_loss_iou": 0.49853515625, + "eval_seeclick_loss_num": 0.046199798583984375, + "eval_seeclick_loss_xval": 0.231170654296875, + "eval_seeclick_runtime": 77.6914, + "eval_seeclick_samples_per_second": 0.553, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 484863484, + "step": 5000 + }, + { + "epoch": 0.4888541259288228, + "eval_icons_CIoU": 0.7152328789234161, + "eval_icons_GIoU": 0.7066245079040527, + "eval_icons_IoU": 0.7371565103530884, + "eval_icons_MAE_all": 0.04898114129900932, + "eval_icons_MAE_h": 0.05643319524824619, + "eval_icons_MAE_w": 0.04315437562763691, + "eval_icons_MAE_x": 0.044442443177103996, + "eval_icons_MAE_y": 0.05189455673098564, + "eval_icons_NUM_probability": 0.999988317489624, + "eval_icons_inside_bbox": 0.8940972089767456, + "eval_icons_loss": 0.15385320782661438, + "eval_icons_loss_ce": 1.408966909366427e-05, + "eval_icons_loss_iou": 0.45147705078125, + "eval_icons_loss_num": 0.033954620361328125, + "eval_icons_loss_xval": 0.169891357421875, + "eval_icons_runtime": 86.0331, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 484863484, + "step": 5000 + }, + { + "epoch": 0.4888541259288228, + "eval_screenspot_CIoU": 0.2904223750034968, + "eval_screenspot_GIoU": 0.2668120463689168, + "eval_screenspot_IoU": 0.3879595398902893, + "eval_screenspot_MAE_all": 0.16495482126871744, + "eval_screenspot_MAE_h": 0.1275845393538475, + "eval_screenspot_MAE_w": 0.2193761244416237, + "eval_screenspot_MAE_x": 0.19291093945503235, + "eval_screenspot_MAE_y": 0.11994765202204387, + "eval_screenspot_NUM_probability": 0.9999599854151408, + "eval_screenspot_inside_bbox": 0.6745833357175192, + "eval_screenspot_loss": 0.5799081921577454, + "eval_screenspot_loss_ce": 0.017591147994001705, + "eval_screenspot_loss_iou": 0.3846028645833333, + "eval_screenspot_loss_num": 0.11346435546875, + "eval_screenspot_loss_xval": 0.567626953125, + "eval_screenspot_runtime": 146.7984, + "eval_screenspot_samples_per_second": 0.606, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 484863484, + "step": 5000 + }, + { + "epoch": 0.4888541259288228, + "eval_compot_CIoU": 0.46402837336063385, + "eval_compot_GIoU": 0.4606800079345703, + "eval_compot_IoU": 0.5243882238864899, + "eval_compot_MAE_all": 0.09443158283829689, + "eval_compot_MAE_h": 0.06597782298922539, + "eval_compot_MAE_w": 0.12495886161923409, + "eval_compot_MAE_x": 0.11769023537635803, + "eval_compot_MAE_y": 0.06909942254424095, + "eval_compot_NUM_probability": 0.9999586641788483, + "eval_compot_inside_bbox": 0.7083333432674408, + "eval_compot_loss": 0.2886422276496887, + "eval_compot_loss_ce": 0.02043922245502472, + "eval_compot_loss_iou": 0.45208740234375, + "eval_compot_loss_num": 0.047210693359375, + "eval_compot_loss_xval": 0.235992431640625, + "eval_compot_runtime": 87.3917, + "eval_compot_samples_per_second": 0.572, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 484863484, + "step": 5000 + }, + { + "epoch": 0.4888541259288228, + "loss": 0.2586168646812439, + "loss_ce": 0.01978631690144539, + "loss_iou": 0.46875, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 484863484, + "step": 5000 + }, + { + "epoch": 0.48895189675400863, + "grad_norm": 4.587549915950705, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 484960420, + "step": 5001 + }, + { + "epoch": 0.48895189675400863, + "loss": 0.0507335439324379, + "loss_ce": 0.0038051370065659285, + "loss_iou": 0.37109375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 484960420, + "step": 5001 + }, + { + "epoch": 0.4890496675791944, + "grad_norm": 6.723650749187011, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 485057496, + "step": 5002 + }, + { + "epoch": 0.4890496675791944, + "loss": 0.11376820504665375, + "loss_ce": 0.007475484162569046, + "loss_iou": 0.283203125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 485057496, + "step": 5002 + }, + { + "epoch": 0.48914743840438013, + "grad_norm": 6.123764375704041, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 485155220, + "step": 5003 + }, + { + "epoch": 0.48914743840438013, + "loss": 0.09284105151891708, + "loss_ce": 0.003195666940882802, + "loss_iou": 0.314453125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 485155220, + "step": 5003 + }, + { + "epoch": 0.4892452092295659, + "grad_norm": 8.610262900534297, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 485252236, + "step": 5004 + }, + { + "epoch": 0.4892452092295659, + "loss": 0.05765601992607117, + "loss_ce": 0.000679703545756638, + "loss_iou": 0.37890625, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 485252236, + "step": 5004 + }, + { + "epoch": 0.4893429800547517, + "grad_norm": 12.72265993203407, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 485349228, + "step": 5005 + }, + { + "epoch": 0.4893429800547517, + "loss": 0.1265040785074234, + "loss_ce": 0.00547136552631855, + "loss_iou": 0.22265625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 485349228, + "step": 5005 + }, + { + "epoch": 0.48944075087993744, + "grad_norm": 3.5707239426150807, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 485446952, + "step": 5006 + }, + { + "epoch": 0.48944075087993744, + "loss": 0.11176939308643341, + "loss_ce": 0.0017230052035301924, + "loss_iou": 0.349609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 485446952, + "step": 5006 + }, + { + "epoch": 0.4895385217051232, + "grad_norm": 3.9376880333412783, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 485544100, + "step": 5007 + }, + { + "epoch": 0.4895385217051232, + "loss": 0.07624925673007965, + "loss_ce": 0.004365099593997002, + "loss_iou": 0.38671875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 485544100, + "step": 5007 + }, + { + "epoch": 0.48963629253030894, + "grad_norm": 48.232089764341694, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 485640452, + "step": 5008 + }, + { + "epoch": 0.48963629253030894, + "loss": 0.07205024361610413, + "loss_ce": 0.0024701657239347696, + "loss_iou": 0.26953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 485640452, + "step": 5008 + }, + { + "epoch": 0.48973406335549474, + "grad_norm": 2.137906648708055, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 485738548, + "step": 5009 + }, + { + "epoch": 0.48973406335549474, + "loss": 0.11855530738830566, + "loss_ce": 0.006052253767848015, + "loss_iou": 0.306640625, + "loss_num": 0.0225830078125, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 485738548, + "step": 5009 + }, + { + "epoch": 0.4898318341806805, + "grad_norm": 6.2683342646752696, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 485835932, + "step": 5010 + }, + { + "epoch": 0.4898318341806805, + "loss": 0.07659858465194702, + "loss_ce": 0.0039972602389752865, + "loss_iou": 0.2421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 485835932, + "step": 5010 + }, + { + "epoch": 0.48992960500586624, + "grad_norm": 30.556819478056518, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 485932944, + "step": 5011 + }, + { + "epoch": 0.48992960500586624, + "loss": 0.08178223669528961, + "loss_ce": 0.004206553101539612, + "loss_iou": 0.228515625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 485932944, + "step": 5011 + }, + { + "epoch": 0.490027375831052, + "grad_norm": 12.506176135310092, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 486029388, + "step": 5012 + }, + { + "epoch": 0.490027375831052, + "loss": 0.11997479200363159, + "loss_ce": 0.005503358319401741, + "loss_iou": 0.279296875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 486029388, + "step": 5012 + }, + { + "epoch": 0.4901251466562378, + "grad_norm": 12.092792927607432, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 486126284, + "step": 5013 + }, + { + "epoch": 0.4901251466562378, + "loss": 0.04717497155070305, + "loss_ce": 0.004869977943599224, + "loss_iou": 0.2734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 486126284, + "step": 5013 + }, + { + "epoch": 0.49022291748142355, + "grad_norm": 70.41880589939815, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 486222696, + "step": 5014 + }, + { + "epoch": 0.49022291748142355, + "loss": 0.06361277401447296, + "loss_ce": 0.0038288384675979614, + "loss_iou": 0.26171875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 486222696, + "step": 5014 + }, + { + "epoch": 0.4903206883066093, + "grad_norm": 32.37575064598721, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 486318664, + "step": 5015 + }, + { + "epoch": 0.4903206883066093, + "loss": 0.06138147413730621, + "loss_ce": 0.006472723558545113, + "loss_iou": 0.3125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 486318664, + "step": 5015 + }, + { + "epoch": 0.49041845913179505, + "grad_norm": 4.348907132672534, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 486416564, + "step": 5016 + }, + { + "epoch": 0.49041845913179505, + "loss": 0.08064115047454834, + "loss_ce": 0.005293247755616903, + "loss_iou": 0.30859375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 486416564, + "step": 5016 + }, + { + "epoch": 0.49051622995698085, + "grad_norm": 8.937176355407313, + "learning_rate": 5e-05, + "loss": 0.1205, + "num_input_tokens_seen": 486513452, + "step": 5017 + }, + { + "epoch": 0.49051622995698085, + "loss": 0.1412769854068756, + "loss_ce": 0.024928709492087364, + "loss_iou": 0.388671875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 486513452, + "step": 5017 + }, + { + "epoch": 0.4906140007821666, + "grad_norm": 7.8607939091913135, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 486610776, + "step": 5018 + }, + { + "epoch": 0.4906140007821666, + "loss": 0.12636980414390564, + "loss_ce": 0.00780901126563549, + "loss_iou": 0.318359375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 486610776, + "step": 5018 + }, + { + "epoch": 0.49071177160735235, + "grad_norm": 13.73973680246968, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 486707484, + "step": 5019 + }, + { + "epoch": 0.49071177160735235, + "loss": 0.08642508089542389, + "loss_ce": 0.00300527922809124, + "loss_iou": 0.2265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 486707484, + "step": 5019 + }, + { + "epoch": 0.4908095424325381, + "grad_norm": 13.65309794024901, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 486803992, + "step": 5020 + }, + { + "epoch": 0.4908095424325381, + "loss": 0.07690086960792542, + "loss_ce": 0.004391102120280266, + "loss_iou": 0.349609375, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 486803992, + "step": 5020 + }, + { + "epoch": 0.4909073132577239, + "grad_norm": 11.151263702169128, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 486900884, + "step": 5021 + }, + { + "epoch": 0.4909073132577239, + "loss": 0.09085820615291595, + "loss_ce": 0.00389073695987463, + "loss_iou": 0.306640625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 486900884, + "step": 5021 + }, + { + "epoch": 0.49100508408290966, + "grad_norm": 9.59950145415995, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 486998692, + "step": 5022 + }, + { + "epoch": 0.49100508408290966, + "loss": 0.1273493468761444, + "loss_ce": 0.00396677665412426, + "loss_iou": 0.369140625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 486998692, + "step": 5022 + }, + { + "epoch": 0.4911028549080954, + "grad_norm": 10.452121848238368, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 487095696, + "step": 5023 + }, + { + "epoch": 0.4911028549080954, + "loss": 0.11246845126152039, + "loss_ce": 0.011516304686665535, + "loss_iou": 0.396484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 487095696, + "step": 5023 + }, + { + "epoch": 0.4912006257332812, + "grad_norm": 6.795496297153279, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 487193204, + "step": 5024 + }, + { + "epoch": 0.4912006257332812, + "loss": 0.09135628491640091, + "loss_ce": 0.008859643712639809, + "loss_iou": 0.310546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 487193204, + "step": 5024 + }, + { + "epoch": 0.49129839655846697, + "grad_norm": 4.322307555502216, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 487289992, + "step": 5025 + }, + { + "epoch": 0.49129839655846697, + "loss": 0.09081359952688217, + "loss_ce": 0.008637391030788422, + "loss_iou": 0.384765625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 487289992, + "step": 5025 + }, + { + "epoch": 0.4913961673836527, + "grad_norm": 8.949957610055705, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 487386520, + "step": 5026 + }, + { + "epoch": 0.4913961673836527, + "loss": 0.08735279738903046, + "loss_ce": 0.00593952601775527, + "loss_iou": 0.302734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 487386520, + "step": 5026 + }, + { + "epoch": 0.49149393820883847, + "grad_norm": 13.018606144840136, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 487483876, + "step": 5027 + }, + { + "epoch": 0.49149393820883847, + "loss": 0.05762861669063568, + "loss_ce": 0.004581438843160868, + "loss_iou": 0.318359375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 487483876, + "step": 5027 + }, + { + "epoch": 0.49159170903402427, + "grad_norm": 20.32142839328843, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 487580272, + "step": 5028 + }, + { + "epoch": 0.49159170903402427, + "loss": 0.10374847054481506, + "loss_ce": 0.00597014557570219, + "loss_iou": 0.3359375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 487580272, + "step": 5028 + }, + { + "epoch": 0.49168947985921, + "grad_norm": 5.251938502816426, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 487677592, + "step": 5029 + }, + { + "epoch": 0.49168947985921, + "loss": 0.05629223585128784, + "loss_ce": 0.005060852505266666, + "loss_iou": 0.3671875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 487677592, + "step": 5029 + }, + { + "epoch": 0.49178725068439577, + "grad_norm": 10.019109667331987, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 487775448, + "step": 5030 + }, + { + "epoch": 0.49178725068439577, + "loss": 0.07793810963630676, + "loss_ce": 0.001743348897434771, + "loss_iou": 0.2255859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 487775448, + "step": 5030 + }, + { + "epoch": 0.4918850215095815, + "grad_norm": 5.580302278908023, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 487873084, + "step": 5031 + }, + { + "epoch": 0.4918850215095815, + "loss": 0.06498105823993683, + "loss_ce": 0.006349159404635429, + "loss_iou": 0.298828125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 487873084, + "step": 5031 + }, + { + "epoch": 0.4919827923347673, + "grad_norm": 7.120624004577472, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 487969532, + "step": 5032 + }, + { + "epoch": 0.4919827923347673, + "loss": 0.06336764991283417, + "loss_ce": 0.002342033199965954, + "loss_iou": 0.345703125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 487969532, + "step": 5032 + }, + { + "epoch": 0.4920805631599531, + "grad_norm": 10.826265863876994, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 488066652, + "step": 5033 + }, + { + "epoch": 0.4920805631599531, + "loss": 0.05243882164359093, + "loss_ce": 0.00776108680292964, + "loss_iou": 0.326171875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 488066652, + "step": 5033 + }, + { + "epoch": 0.4921783339851388, + "grad_norm": 11.980796167776402, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 488164012, + "step": 5034 + }, + { + "epoch": 0.4921783339851388, + "loss": 0.09365618228912354, + "loss_ce": 0.004575377330183983, + "loss_iou": 0.2138671875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 488164012, + "step": 5034 + }, + { + "epoch": 0.4922761048103246, + "grad_norm": 6.774421961128103, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 488259636, + "step": 5035 + }, + { + "epoch": 0.4922761048103246, + "loss": 0.07936307787895203, + "loss_ce": 0.009035314433276653, + "loss_iou": 0.2060546875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 488259636, + "step": 5035 + }, + { + "epoch": 0.4923738756355104, + "grad_norm": 6.872496690542118, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 488356660, + "step": 5036 + }, + { + "epoch": 0.4923738756355104, + "loss": 0.059598080813884735, + "loss_ce": 0.0016604592092335224, + "loss_iou": 0.328125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 488356660, + "step": 5036 + }, + { + "epoch": 0.49247164646069613, + "grad_norm": 7.540018806209688, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 488454140, + "step": 5037 + }, + { + "epoch": 0.49247164646069613, + "loss": 0.07543186098337173, + "loss_ce": 0.001838723081164062, + "loss_iou": 0.32421875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 488454140, + "step": 5037 + }, + { + "epoch": 0.4925694172858819, + "grad_norm": 3.106384779497984, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 488551224, + "step": 5038 + }, + { + "epoch": 0.4925694172858819, + "loss": 0.07198972254991531, + "loss_ce": 0.00631589675322175, + "loss_iou": 0.3203125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 488551224, + "step": 5038 + }, + { + "epoch": 0.49266718811106763, + "grad_norm": 3.7005403166836373, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 488647696, + "step": 5039 + }, + { + "epoch": 0.49266718811106763, + "loss": 0.07696348428726196, + "loss_ce": 0.001455369871109724, + "loss_iou": 0.30078125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 488647696, + "step": 5039 + }, + { + "epoch": 0.49276495893625344, + "grad_norm": 6.015037401568747, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 488744720, + "step": 5040 + }, + { + "epoch": 0.49276495893625344, + "loss": 0.11544855684041977, + "loss_ce": 0.005676827859133482, + "loss_iou": 0.2041015625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 488744720, + "step": 5040 + }, + { + "epoch": 0.4928627297614392, + "grad_norm": 7.366634239294012, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 488841288, + "step": 5041 + }, + { + "epoch": 0.4928627297614392, + "loss": 0.1396579146385193, + "loss_ce": 0.008020333014428616, + "loss_iou": 0.27734375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 488841288, + "step": 5041 + }, + { + "epoch": 0.49296050058662494, + "grad_norm": 3.1127471918585377, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 488938696, + "step": 5042 + }, + { + "epoch": 0.49296050058662494, + "loss": 0.08735176175832748, + "loss_ce": 0.004527050070464611, + "loss_iou": 0.3359375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 488938696, + "step": 5042 + }, + { + "epoch": 0.4930582714118107, + "grad_norm": 6.843405220603861, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 489036852, + "step": 5043 + }, + { + "epoch": 0.4930582714118107, + "loss": 0.059702519327402115, + "loss_ce": 0.0027910517528653145, + "loss_iou": 0.3984375, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 489036852, + "step": 5043 + }, + { + "epoch": 0.4931560422369965, + "grad_norm": 7.156360701975443, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 489134060, + "step": 5044 + }, + { + "epoch": 0.4931560422369965, + "loss": 0.1173127144575119, + "loss_ce": 0.007769872434437275, + "loss_iou": 0.283203125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 489134060, + "step": 5044 + }, + { + "epoch": 0.49325381306218224, + "grad_norm": 20.871091654974503, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 489230664, + "step": 5045 + }, + { + "epoch": 0.49325381306218224, + "loss": 0.11277773976325989, + "loss_ce": 0.006118815392255783, + "loss_iou": 0.291015625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 489230664, + "step": 5045 + }, + { + "epoch": 0.493351583887368, + "grad_norm": 24.71982629354857, + "learning_rate": 5e-05, + "loss": 0.1224, + "num_input_tokens_seen": 489327772, + "step": 5046 + }, + { + "epoch": 0.493351583887368, + "loss": 0.09567202627658844, + "loss_ce": 0.002807042095810175, + "loss_iou": 0.333984375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 489327772, + "step": 5046 + }, + { + "epoch": 0.4934493547125538, + "grad_norm": 16.717559774290883, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 489424488, + "step": 5047 + }, + { + "epoch": 0.4934493547125538, + "loss": 0.11092182993888855, + "loss_ce": 0.0028896043077111244, + "loss_iou": 0.3046875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 489424488, + "step": 5047 + }, + { + "epoch": 0.49354712553773955, + "grad_norm": 15.511445778378011, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 489522280, + "step": 5048 + }, + { + "epoch": 0.49354712553773955, + "loss": 0.07167229056358337, + "loss_ce": 0.007463305722922087, + "loss_iou": 0.318359375, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 489522280, + "step": 5048 + }, + { + "epoch": 0.4936448963629253, + "grad_norm": 7.943027403738175, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 489619908, + "step": 5049 + }, + { + "epoch": 0.4936448963629253, + "loss": 0.09412400424480438, + "loss_ce": 0.003486798144876957, + "loss_iou": 0.3359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 489619908, + "step": 5049 + }, + { + "epoch": 0.49374266718811105, + "grad_norm": 24.491982619483426, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 489717024, + "step": 5050 + }, + { + "epoch": 0.49374266718811105, + "loss": 0.07049429416656494, + "loss_ce": 0.002928375732153654, + "loss_iou": 0.357421875, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 489717024, + "step": 5050 + }, + { + "epoch": 0.49384043801329686, + "grad_norm": 20.963186552883943, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 489814072, + "step": 5051 + }, + { + "epoch": 0.49384043801329686, + "loss": 0.08511725068092346, + "loss_ce": 0.00893012247979641, + "loss_iou": 0.27734375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 489814072, + "step": 5051 + }, + { + "epoch": 0.4939382088384826, + "grad_norm": 3.329052898994012, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 489911188, + "step": 5052 + }, + { + "epoch": 0.4939382088384826, + "loss": 0.08955571055412292, + "loss_ce": 0.005228015594184399, + "loss_iou": 0.361328125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 489911188, + "step": 5052 + }, + { + "epoch": 0.49403597966366836, + "grad_norm": 15.441125290467362, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 490008488, + "step": 5053 + }, + { + "epoch": 0.49403597966366836, + "loss": 0.09935042262077332, + "loss_ce": 0.006729569286108017, + "loss_iou": 0.287109375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 490008488, + "step": 5053 + }, + { + "epoch": 0.4941337504888541, + "grad_norm": 27.85120893736861, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 490105772, + "step": 5054 + }, + { + "epoch": 0.4941337504888541, + "loss": 0.05693519487977028, + "loss_ce": 0.002354505704715848, + "loss_iou": 0.353515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 490105772, + "step": 5054 + }, + { + "epoch": 0.4942315213140399, + "grad_norm": 8.243646464123685, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 490202956, + "step": 5055 + }, + { + "epoch": 0.4942315213140399, + "loss": 0.08936583995819092, + "loss_ce": 0.01130186952650547, + "loss_iou": 0.490234375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 490202956, + "step": 5055 + }, + { + "epoch": 0.49432929213922566, + "grad_norm": 5.5430375326716215, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 490299308, + "step": 5056 + }, + { + "epoch": 0.49432929213922566, + "loss": 0.14311695098876953, + "loss_ce": 0.007885932922363281, + "loss_iou": 0.267578125, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 490299308, + "step": 5056 + }, + { + "epoch": 0.4944270629644114, + "grad_norm": 6.078876556138949, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 490396392, + "step": 5057 + }, + { + "epoch": 0.4944270629644114, + "loss": 0.056640103459358215, + "loss_ce": 0.0027308010030537844, + "loss_iou": 0.34375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 490396392, + "step": 5057 + }, + { + "epoch": 0.49452483378959716, + "grad_norm": 3.360048151305289, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 490493224, + "step": 5058 + }, + { + "epoch": 0.49452483378959716, + "loss": 0.08827437460422516, + "loss_ce": 0.0042747436091303825, + "loss_iou": 0.271484375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 490493224, + "step": 5058 + }, + { + "epoch": 0.49462260461478297, + "grad_norm": 9.64412220756981, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 490590060, + "step": 5059 + }, + { + "epoch": 0.49462260461478297, + "loss": 0.05905155837535858, + "loss_ce": 0.0022430862300097942, + "loss_iou": 0.421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 490590060, + "step": 5059 + }, + { + "epoch": 0.4947203754399687, + "grad_norm": 4.7318139903507985, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 490686424, + "step": 5060 + }, + { + "epoch": 0.4947203754399687, + "loss": 0.07782427966594696, + "loss_ce": 0.0014845579862594604, + "loss_iou": 0.23046875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 490686424, + "step": 5060 + }, + { + "epoch": 0.49481814626515447, + "grad_norm": 12.461442015882428, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 490783332, + "step": 5061 + }, + { + "epoch": 0.49481814626515447, + "loss": 0.11832049489021301, + "loss_ce": 0.0071449605748057365, + "loss_iou": 0.33203125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 490783332, + "step": 5061 + }, + { + "epoch": 0.4949159170903402, + "grad_norm": 14.1600777741945, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 490881104, + "step": 5062 + }, + { + "epoch": 0.4949159170903402, + "loss": 0.0828174576163292, + "loss_ce": 0.008636854588985443, + "loss_iou": 0.28515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 490881104, + "step": 5062 + }, + { + "epoch": 0.495013687915526, + "grad_norm": 7.239807680846136, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 490978792, + "step": 5063 + }, + { + "epoch": 0.495013687915526, + "loss": 0.08896671235561371, + "loss_ce": 0.003853186499327421, + "loss_iou": 0.46484375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 490978792, + "step": 5063 + }, + { + "epoch": 0.4951114587407118, + "grad_norm": 5.940405940426292, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 491076136, + "step": 5064 + }, + { + "epoch": 0.4951114587407118, + "loss": 0.08134834468364716, + "loss_ce": 0.00416939239948988, + "loss_iou": 0.388671875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 491076136, + "step": 5064 + }, + { + "epoch": 0.4952092295658975, + "grad_norm": 7.597555507580644, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 491173360, + "step": 5065 + }, + { + "epoch": 0.4952092295658975, + "loss": 0.0651886835694313, + "loss_ce": 0.005755700170993805, + "loss_iou": 0.40625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 491173360, + "step": 5065 + }, + { + "epoch": 0.4953070003910833, + "grad_norm": 13.886496155558234, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 491270424, + "step": 5066 + }, + { + "epoch": 0.4953070003910833, + "loss": 0.11562605202198029, + "loss_ce": 0.0030467018950730562, + "loss_iou": 0.376953125, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 491270424, + "step": 5066 + }, + { + "epoch": 0.4954047712162691, + "grad_norm": 5.240496857908634, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 491367448, + "step": 5067 + }, + { + "epoch": 0.4954047712162691, + "loss": 0.09968484938144684, + "loss_ce": 0.005995885469019413, + "loss_iou": 0.41796875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 491367448, + "step": 5067 + }, + { + "epoch": 0.49550254204145483, + "grad_norm": 3.8060022856468247, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 491465224, + "step": 5068 + }, + { + "epoch": 0.49550254204145483, + "loss": 0.10139414668083191, + "loss_ce": 0.0022730508353561163, + "loss_iou": 0.33203125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 491465224, + "step": 5068 + }, + { + "epoch": 0.4956003128666406, + "grad_norm": 10.592311526397534, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 491561492, + "step": 5069 + }, + { + "epoch": 0.4956003128666406, + "loss": 0.08701524883508682, + "loss_ce": 0.004903895314782858, + "loss_iou": 0.2265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 491561492, + "step": 5069 + }, + { + "epoch": 0.4956980836918264, + "grad_norm": 14.511602726090272, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 491658688, + "step": 5070 + }, + { + "epoch": 0.4956980836918264, + "loss": 0.09391121566295624, + "loss_ce": 0.0064897965639829636, + "loss_iou": 0.10693359375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 491658688, + "step": 5070 + }, + { + "epoch": 0.49579585451701214, + "grad_norm": 17.4878061456798, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 491754676, + "step": 5071 + }, + { + "epoch": 0.49579585451701214, + "loss": 0.0682261660695076, + "loss_ce": 0.004658050369471312, + "loss_iou": 0.21875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 491754676, + "step": 5071 + }, + { + "epoch": 0.4958936253421979, + "grad_norm": 22.547644399933418, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 491851416, + "step": 5072 + }, + { + "epoch": 0.4958936253421979, + "loss": 0.07512959837913513, + "loss_ce": 0.004389855079352856, + "loss_iou": 0.291015625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 491851416, + "step": 5072 + }, + { + "epoch": 0.49599139616738364, + "grad_norm": 4.91685669076715, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 491948992, + "step": 5073 + }, + { + "epoch": 0.49599139616738364, + "loss": 0.06328277289867401, + "loss_ce": 0.011135859414935112, + "loss_iou": 0.26171875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 491948992, + "step": 5073 + }, + { + "epoch": 0.49608916699256944, + "grad_norm": 5.762080038911986, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 492045796, + "step": 5074 + }, + { + "epoch": 0.49608916699256944, + "loss": 0.07372479140758514, + "loss_ce": 0.00776105048134923, + "loss_iou": 0.224609375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 492045796, + "step": 5074 + }, + { + "epoch": 0.4961869378177552, + "grad_norm": 9.4151219261926, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 492143732, + "step": 5075 + }, + { + "epoch": 0.4961869378177552, + "loss": 0.08257646858692169, + "loss_ce": 0.00628252699971199, + "loss_iou": 0.34375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 492143732, + "step": 5075 + }, + { + "epoch": 0.49628470864294094, + "grad_norm": 7.295279574493191, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 492240324, + "step": 5076 + }, + { + "epoch": 0.49628470864294094, + "loss": 0.1027425080537796, + "loss_ce": 0.003651923965662718, + "loss_iou": 0.275390625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 492240324, + "step": 5076 + }, + { + "epoch": 0.4963824794681267, + "grad_norm": 6.264580770519361, + "learning_rate": 5e-05, + "loss": 0.1208, + "num_input_tokens_seen": 492336868, + "step": 5077 + }, + { + "epoch": 0.4963824794681267, + "loss": 0.1001351922750473, + "loss_ce": 0.004233698360621929, + "loss_iou": 0.2490234375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 492336868, + "step": 5077 + }, + { + "epoch": 0.4964802502933125, + "grad_norm": 3.2692437467410493, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 492433024, + "step": 5078 + }, + { + "epoch": 0.4964802502933125, + "loss": 0.11029671132564545, + "loss_ce": 0.0077118719927966595, + "loss_iou": 0.173828125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 492433024, + "step": 5078 + }, + { + "epoch": 0.49657802111849825, + "grad_norm": 4.300603317694134, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 492530316, + "step": 5079 + }, + { + "epoch": 0.49657802111849825, + "loss": 0.08374562859535217, + "loss_ce": 0.0014320902992039919, + "loss_iou": 0.326171875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 492530316, + "step": 5079 + }, + { + "epoch": 0.496675791943684, + "grad_norm": 5.573989644803122, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 492627604, + "step": 5080 + }, + { + "epoch": 0.496675791943684, + "loss": 0.07498840987682343, + "loss_ce": 0.007956553250551224, + "loss_iou": 0.3359375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 492627604, + "step": 5080 + }, + { + "epoch": 0.49677356276886975, + "grad_norm": 5.263309013462923, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 492725132, + "step": 5081 + }, + { + "epoch": 0.49677356276886975, + "loss": 0.06703421473503113, + "loss_ce": 0.006258456036448479, + "loss_iou": 0.328125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 492725132, + "step": 5081 + }, + { + "epoch": 0.49687133359405555, + "grad_norm": 3.872172366232047, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 492821704, + "step": 5082 + }, + { + "epoch": 0.49687133359405555, + "loss": 0.059821803122758865, + "loss_ce": 0.00380678940564394, + "loss_iou": 0.349609375, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 492821704, + "step": 5082 + }, + { + "epoch": 0.4969691044192413, + "grad_norm": 20.926148426510096, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 492918764, + "step": 5083 + }, + { + "epoch": 0.4969691044192413, + "loss": 0.08724616467952728, + "loss_ce": 0.01148628257215023, + "loss_iou": 0.306640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 492918764, + "step": 5083 + }, + { + "epoch": 0.49706687524442705, + "grad_norm": 9.17307176822998, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 493014908, + "step": 5084 + }, + { + "epoch": 0.49706687524442705, + "loss": 0.09321482479572296, + "loss_ce": 0.008910015225410461, + "loss_iou": 0.291015625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 493014908, + "step": 5084 + }, + { + "epoch": 0.4971646460696128, + "grad_norm": 6.338814924313622, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 493112472, + "step": 5085 + }, + { + "epoch": 0.4971646460696128, + "loss": 0.08130239695310593, + "loss_ce": 0.00709128100425005, + "loss_iou": 0.267578125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 493112472, + "step": 5085 + }, + { + "epoch": 0.4972624168947986, + "grad_norm": 8.306871926516637, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 493209412, + "step": 5086 + }, + { + "epoch": 0.4972624168947986, + "loss": 0.08300751447677612, + "loss_ce": 0.009063420817255974, + "loss_iou": 0.283203125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 493209412, + "step": 5086 + }, + { + "epoch": 0.49736018771998436, + "grad_norm": 5.728781884245144, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 493305452, + "step": 5087 + }, + { + "epoch": 0.49736018771998436, + "loss": 0.07049891352653503, + "loss_ce": 0.010760755278170109, + "loss_iou": 0.181640625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 493305452, + "step": 5087 + }, + { + "epoch": 0.4974579585451701, + "grad_norm": 35.42698272554856, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 493401972, + "step": 5088 + }, + { + "epoch": 0.4974579585451701, + "loss": 0.09334485232830048, + "loss_ce": 0.005149052478373051, + "loss_iou": 0.1953125, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 493401972, + "step": 5088 + }, + { + "epoch": 0.49755572937035586, + "grad_norm": 65.59262239130922, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 493498772, + "step": 5089 + }, + { + "epoch": 0.49755572937035586, + "loss": 0.06740380078554153, + "loss_ce": 0.004621513187885284, + "loss_iou": 0.283203125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 493498772, + "step": 5089 + }, + { + "epoch": 0.49765350019554166, + "grad_norm": 25.476148668690723, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 493595432, + "step": 5090 + }, + { + "epoch": 0.49765350019554166, + "loss": 0.0972350463271141, + "loss_ce": 0.004045810084789991, + "loss_iou": 0.2890625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 493595432, + "step": 5090 + }, + { + "epoch": 0.4977512710207274, + "grad_norm": 3.3889445017613333, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 493692860, + "step": 5091 + }, + { + "epoch": 0.4977512710207274, + "loss": 0.0483933724462986, + "loss_ce": 0.005577208939939737, + "loss_iou": 0.36328125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 493692860, + "step": 5091 + }, + { + "epoch": 0.49784904184591317, + "grad_norm": 8.116148038292412, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 493790068, + "step": 5092 + }, + { + "epoch": 0.49784904184591317, + "loss": 0.04349170997738838, + "loss_ce": 0.002415051218122244, + "loss_iou": 0.478515625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 493790068, + "step": 5092 + }, + { + "epoch": 0.49794681267109897, + "grad_norm": 2.783008161610162, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 493886440, + "step": 5093 + }, + { + "epoch": 0.49794681267109897, + "loss": 0.070146843791008, + "loss_ce": 0.0038626673631370068, + "loss_iou": 0.34375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 493886440, + "step": 5093 + }, + { + "epoch": 0.4980445834962847, + "grad_norm": 5.149331535552418, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 493982500, + "step": 5094 + }, + { + "epoch": 0.4980445834962847, + "loss": 0.09598512947559357, + "loss_ce": 0.0035168700851500034, + "loss_iou": 0.228515625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 493982500, + "step": 5094 + }, + { + "epoch": 0.49814235432147047, + "grad_norm": 7.110217558560749, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 494079320, + "step": 5095 + }, + { + "epoch": 0.49814235432147047, + "loss": 0.07057134807109833, + "loss_ce": 0.004729672335088253, + "loss_iou": 0.212890625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 494079320, + "step": 5095 + }, + { + "epoch": 0.4982401251466562, + "grad_norm": 5.713554764958665, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 494176492, + "step": 5096 + }, + { + "epoch": 0.4982401251466562, + "loss": 0.048969678580760956, + "loss_ce": 0.006411009468138218, + "loss_iou": 0.298828125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 494176492, + "step": 5096 + }, + { + "epoch": 0.498337895971842, + "grad_norm": 3.0540296137246323, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 494273812, + "step": 5097 + }, + { + "epoch": 0.498337895971842, + "loss": 0.08306469023227692, + "loss_ce": 0.001323349541053176, + "loss_iou": 0.2734375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 494273812, + "step": 5097 + }, + { + "epoch": 0.4984356667970278, + "grad_norm": 10.480482061216987, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 494370728, + "step": 5098 + }, + { + "epoch": 0.4984356667970278, + "loss": 0.07073365151882172, + "loss_ce": 0.008584599941968918, + "loss_iou": 0.330078125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 494370728, + "step": 5098 + }, + { + "epoch": 0.4985334376222135, + "grad_norm": 6.922589408504166, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 494467492, + "step": 5099 + }, + { + "epoch": 0.4985334376222135, + "loss": 0.06617486476898193, + "loss_ce": 0.00336206192150712, + "loss_iou": 0.171875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 494467492, + "step": 5099 + }, + { + "epoch": 0.4986312084473993, + "grad_norm": 3.534764638771576, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 494564944, + "step": 5100 + }, + { + "epoch": 0.4986312084473993, + "loss": 0.07472109794616699, + "loss_ce": 0.00422549108043313, + "loss_iou": 0.265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 494564944, + "step": 5100 + }, + { + "epoch": 0.4987289792725851, + "grad_norm": 17.228456800443695, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 494661220, + "step": 5101 + }, + { + "epoch": 0.4987289792725851, + "loss": 0.08759040385484695, + "loss_ce": 0.003300851210951805, + "loss_iou": 0.25, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 494661220, + "step": 5101 + }, + { + "epoch": 0.49882675009777083, + "grad_norm": 10.183444525750643, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 494758948, + "step": 5102 + }, + { + "epoch": 0.49882675009777083, + "loss": 0.06849314272403717, + "loss_ce": 0.005306490231305361, + "loss_iou": 0.36328125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 494758948, + "step": 5102 + }, + { + "epoch": 0.4989245209229566, + "grad_norm": 23.713479092279137, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 494856404, + "step": 5103 + }, + { + "epoch": 0.4989245209229566, + "loss": 0.10764788091182709, + "loss_ce": 0.0062455954030156136, + "loss_iou": 0.302734375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 494856404, + "step": 5103 + }, + { + "epoch": 0.49902229174814233, + "grad_norm": 6.324060193239204, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 494953352, + "step": 5104 + }, + { + "epoch": 0.49902229174814233, + "loss": 0.06449876725673676, + "loss_ce": 0.006133895833045244, + "loss_iou": 0.302734375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 494953352, + "step": 5104 + }, + { + "epoch": 0.49912006257332814, + "grad_norm": 3.8786025579604795, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 495050212, + "step": 5105 + }, + { + "epoch": 0.49912006257332814, + "loss": 0.10816696286201477, + "loss_ce": 0.007764134090393782, + "loss_iou": 0.396484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 495050212, + "step": 5105 + }, + { + "epoch": 0.4992178333985139, + "grad_norm": 11.69064736869847, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 495147292, + "step": 5106 + }, + { + "epoch": 0.4992178333985139, + "loss": 0.1411350518465042, + "loss_ce": 0.00591165479272604, + "loss_iou": 0.404296875, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 495147292, + "step": 5106 + }, + { + "epoch": 0.49931560422369964, + "grad_norm": 4.342517329552408, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 495243660, + "step": 5107 + }, + { + "epoch": 0.49931560422369964, + "loss": 0.08930978178977966, + "loss_ce": 0.006401157937943935, + "loss_iou": 0.2490234375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 495243660, + "step": 5107 + }, + { + "epoch": 0.4994133750488854, + "grad_norm": 6.8176290961175186, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 495340996, + "step": 5108 + }, + { + "epoch": 0.4994133750488854, + "loss": 0.11062556505203247, + "loss_ce": 0.006209671031683683, + "loss_iou": 0.3125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 495340996, + "step": 5108 + }, + { + "epoch": 0.4995111458740712, + "grad_norm": 17.177252968946036, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 495438300, + "step": 5109 + }, + { + "epoch": 0.4995111458740712, + "loss": 0.0897057056427002, + "loss_ce": 0.002745866309851408, + "loss_iou": 0.3125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 495438300, + "step": 5109 + }, + { + "epoch": 0.49960891669925694, + "grad_norm": 13.86792413290348, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 495534344, + "step": 5110 + }, + { + "epoch": 0.49960891669925694, + "loss": 0.09596842527389526, + "loss_ce": 0.005987341515719891, + "loss_iou": 0.2890625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 495534344, + "step": 5110 + }, + { + "epoch": 0.4997066875244427, + "grad_norm": 8.23480446135195, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 495631068, + "step": 5111 + }, + { + "epoch": 0.4997066875244427, + "loss": 0.088499054312706, + "loss_ce": 0.005201320629566908, + "loss_iou": 0.30078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 495631068, + "step": 5111 + }, + { + "epoch": 0.49980445834962844, + "grad_norm": 8.069109629549194, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 495728228, + "step": 5112 + }, + { + "epoch": 0.49980445834962844, + "loss": 0.06694328784942627, + "loss_ce": 0.007022019010037184, + "loss_iou": 0.3359375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 495728228, + "step": 5112 + }, + { + "epoch": 0.49990222917481425, + "grad_norm": 3.9393746594103654, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 495825652, + "step": 5113 + }, + { + "epoch": 0.49990222917481425, + "loss": 0.0911509245634079, + "loss_ce": 0.004221603274345398, + "loss_iou": 0.341796875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 495825652, + "step": 5113 + }, + { + "epoch": 0.5, + "grad_norm": 5.455682968305806, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 495922820, + "step": 5114 + }, + { + "epoch": 0.5, + "loss": 0.0876055359840393, + "loss_ce": 0.0044146133586764336, + "loss_iou": 0.359375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 495922820, + "step": 5114 + }, + { + "epoch": 0.5000977708251858, + "grad_norm": 5.740083505086736, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 496019616, + "step": 5115 + }, + { + "epoch": 0.5000977708251858, + "loss": 0.10965240746736526, + "loss_ce": 0.003954775165766478, + "loss_iou": 0.2578125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 496019616, + "step": 5115 + }, + { + "epoch": 0.5001955416503715, + "grad_norm": 5.862818797408844, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 496117016, + "step": 5116 + }, + { + "epoch": 0.5001955416503715, + "loss": 0.06216675415635109, + "loss_ce": 0.0033441237173974514, + "loss_iou": 0.361328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 496117016, + "step": 5116 + }, + { + "epoch": 0.5002933124755573, + "grad_norm": 10.937459683145947, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 496214172, + "step": 5117 + }, + { + "epoch": 0.5002933124755573, + "loss": 0.06389784067869186, + "loss_ce": 0.0015656861942261457, + "loss_iou": 0.26953125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 496214172, + "step": 5117 + }, + { + "epoch": 0.5003910833007431, + "grad_norm": 12.99647359314032, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 496310680, + "step": 5118 + }, + { + "epoch": 0.5003910833007431, + "loss": 0.06221940740942955, + "loss_ce": 0.00344255194067955, + "loss_iou": 0.453125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 496310680, + "step": 5118 + }, + { + "epoch": 0.5004888541259288, + "grad_norm": 4.349638991133239, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 496408180, + "step": 5119 + }, + { + "epoch": 0.5004888541259288, + "loss": 0.06303223222494125, + "loss_ce": 0.005298697389662266, + "loss_iou": 0.279296875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 496408180, + "step": 5119 + }, + { + "epoch": 0.5005866249511146, + "grad_norm": 3.7242370664340374, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 496505544, + "step": 5120 + }, + { + "epoch": 0.5005866249511146, + "loss": 0.03728558123111725, + "loss_ce": 0.003258479293435812, + "loss_iou": 0.412109375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 496505544, + "step": 5120 + }, + { + "epoch": 0.5006843957763003, + "grad_norm": 4.957755193397547, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 496602756, + "step": 5121 + }, + { + "epoch": 0.5006843957763003, + "loss": 0.0721515566110611, + "loss_ce": 0.002518067369237542, + "loss_iou": 0.390625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 496602756, + "step": 5121 + }, + { + "epoch": 0.5007821666014861, + "grad_norm": 3.6913125191777327, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 496699352, + "step": 5122 + }, + { + "epoch": 0.5007821666014861, + "loss": 0.07311592251062393, + "loss_ce": 0.006435013376176357, + "loss_iou": 0.3203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 496699352, + "step": 5122 + }, + { + "epoch": 0.5008799374266719, + "grad_norm": 6.676094451693893, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 496796096, + "step": 5123 + }, + { + "epoch": 0.5008799374266719, + "loss": 0.09659025073051453, + "loss_ce": 0.005190104246139526, + "loss_iou": 0.26953125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 496796096, + "step": 5123 + }, + { + "epoch": 0.5009777082518576, + "grad_norm": 18.041931302758247, + "learning_rate": 5e-05, + "loss": 0.1185, + "num_input_tokens_seen": 496893236, + "step": 5124 + }, + { + "epoch": 0.5009777082518576, + "loss": 0.14092057943344116, + "loss_ce": 0.004690106492489576, + "loss_iou": 0.322265625, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 496893236, + "step": 5124 + }, + { + "epoch": 0.5010754790770434, + "grad_norm": 24.878428248472797, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 496989196, + "step": 5125 + }, + { + "epoch": 0.5010754790770434, + "loss": 0.07279390841722488, + "loss_ce": 0.0071353367529809475, + "loss_iou": 0.2734375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 496989196, + "step": 5125 + }, + { + "epoch": 0.5011732499022292, + "grad_norm": 6.602317035795599, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 497086884, + "step": 5126 + }, + { + "epoch": 0.5011732499022292, + "loss": 0.08844177424907684, + "loss_ce": 0.004060679115355015, + "loss_iou": 0.484375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 497086884, + "step": 5126 + }, + { + "epoch": 0.5012710207274149, + "grad_norm": 3.5542512782586186, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 497184128, + "step": 5127 + }, + { + "epoch": 0.5012710207274149, + "loss": 0.06251540780067444, + "loss_ce": 0.00798049196600914, + "loss_iou": 0.37890625, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 497184128, + "step": 5127 + }, + { + "epoch": 0.5013687915526007, + "grad_norm": 4.792176129288096, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 497281108, + "step": 5128 + }, + { + "epoch": 0.5013687915526007, + "loss": 0.09638041257858276, + "loss_ce": 0.004705609753727913, + "loss_iou": 0.34375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 497281108, + "step": 5128 + }, + { + "epoch": 0.5014665623777864, + "grad_norm": 3.166824001380504, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 497377936, + "step": 5129 + }, + { + "epoch": 0.5014665623777864, + "loss": 0.07327224314212799, + "loss_ce": 0.005263822618871927, + "loss_iou": 0.251953125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 497377936, + "step": 5129 + }, + { + "epoch": 0.5015643332029722, + "grad_norm": 6.201306697448449, + "learning_rate": 5e-05, + "loss": 0.1177, + "num_input_tokens_seen": 497475916, + "step": 5130 + }, + { + "epoch": 0.5015643332029722, + "loss": 0.1754072606563568, + "loss_ce": 0.00539384875446558, + "loss_iou": 0.392578125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 497475916, + "step": 5130 + }, + { + "epoch": 0.501662104028158, + "grad_norm": 7.976437807089886, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 497572636, + "step": 5131 + }, + { + "epoch": 0.501662104028158, + "loss": 0.06180195510387421, + "loss_ce": 0.007511180825531483, + "loss_iou": 0.353515625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 497572636, + "step": 5131 + }, + { + "epoch": 0.5017598748533437, + "grad_norm": 13.382430093010226, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 497670944, + "step": 5132 + }, + { + "epoch": 0.5017598748533437, + "loss": 0.11343206465244293, + "loss_ce": 0.004362242761999369, + "loss_iou": 0.408203125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 497670944, + "step": 5132 + }, + { + "epoch": 0.5018576456785295, + "grad_norm": 3.2536969091433794, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 497767444, + "step": 5133 + }, + { + "epoch": 0.5018576456785295, + "loss": 0.10783983767032623, + "loss_ce": 0.005468618590384722, + "loss_iou": 0.32421875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 497767444, + "step": 5133 + }, + { + "epoch": 0.5019554165037153, + "grad_norm": 5.927562189609695, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 497865400, + "step": 5134 + }, + { + "epoch": 0.5019554165037153, + "loss": 0.038356564939022064, + "loss_ce": 0.0008046846487559378, + "loss_iou": 0.390625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 497865400, + "step": 5134 + }, + { + "epoch": 0.502053187328901, + "grad_norm": 3.605718172261692, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 497962020, + "step": 5135 + }, + { + "epoch": 0.502053187328901, + "loss": 0.10232483595609665, + "loss_ce": 0.007872930727899075, + "loss_iou": 0.33203125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 497962020, + "step": 5135 + }, + { + "epoch": 0.5021509581540868, + "grad_norm": 5.7443257076491525, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 498058736, + "step": 5136 + }, + { + "epoch": 0.5021509581540868, + "loss": 0.04781100153923035, + "loss_ce": 0.008489102125167847, + "loss_iou": 0.3359375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 498058736, + "step": 5136 + }, + { + "epoch": 0.5022487289792725, + "grad_norm": 4.7191222769528975, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 498155252, + "step": 5137 + }, + { + "epoch": 0.5022487289792725, + "loss": 0.06813503801822662, + "loss_ce": 0.005970727652311325, + "loss_iou": 0.232421875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 498155252, + "step": 5137 + }, + { + "epoch": 0.5023464998044583, + "grad_norm": 3.7886226299790664, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 498251304, + "step": 5138 + }, + { + "epoch": 0.5023464998044583, + "loss": 0.06781886518001556, + "loss_ce": 0.007348287850618362, + "loss_iou": 0.2099609375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 498251304, + "step": 5138 + }, + { + "epoch": 0.5024442706296441, + "grad_norm": 18.456719212074745, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 498348092, + "step": 5139 + }, + { + "epoch": 0.5024442706296441, + "loss": 0.07294204086065292, + "loss_ce": 0.007970120757818222, + "loss_iou": 0.2373046875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 498348092, + "step": 5139 + }, + { + "epoch": 0.5025420414548298, + "grad_norm": 11.09208087091205, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 498444796, + "step": 5140 + }, + { + "epoch": 0.5025420414548298, + "loss": 0.06137245148420334, + "loss_ce": 0.005960162729024887, + "loss_iou": 0.259765625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 498444796, + "step": 5140 + }, + { + "epoch": 0.5026398122800156, + "grad_norm": 9.30276815304342, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 498542540, + "step": 5141 + }, + { + "epoch": 0.5026398122800156, + "loss": 0.09055538475513458, + "loss_ce": 0.009103966876864433, + "loss_iou": 0.294921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 498542540, + "step": 5141 + }, + { + "epoch": 0.5027375831052014, + "grad_norm": 26.233713685046038, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 498638980, + "step": 5142 + }, + { + "epoch": 0.5027375831052014, + "loss": 0.0484958253800869, + "loss_ce": 0.00455051101744175, + "loss_iou": 0.41796875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 498638980, + "step": 5142 + }, + { + "epoch": 0.5028353539303871, + "grad_norm": 10.466007626829509, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 498736792, + "step": 5143 + }, + { + "epoch": 0.5028353539303871, + "loss": 0.04758021980524063, + "loss_ce": 0.0016360052395612001, + "loss_iou": 0.416015625, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 498736792, + "step": 5143 + }, + { + "epoch": 0.502933124755573, + "grad_norm": 15.962638621834733, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 498834344, + "step": 5144 + }, + { + "epoch": 0.502933124755573, + "loss": 0.06088060513138771, + "loss_ce": 0.0041942051611840725, + "loss_iou": 0.45703125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 498834344, + "step": 5144 + }, + { + "epoch": 0.5030308955807588, + "grad_norm": 4.971380153911146, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 498931800, + "step": 5145 + }, + { + "epoch": 0.5030308955807588, + "loss": 0.09570352733135223, + "loss_ce": 0.005249423906207085, + "loss_iou": 0.357421875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 498931800, + "step": 5145 + }, + { + "epoch": 0.5031286664059444, + "grad_norm": 1.686155552897226, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 499028968, + "step": 5146 + }, + { + "epoch": 0.5031286664059444, + "loss": 0.10308566689491272, + "loss_ce": 0.006978180259466171, + "loss_iou": 0.30078125, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 499028968, + "step": 5146 + }, + { + "epoch": 0.5032264372311303, + "grad_norm": 1.9049926845393859, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 499126564, + "step": 5147 + }, + { + "epoch": 0.5032264372311303, + "loss": 0.04488144814968109, + "loss_ce": 0.0024620145559310913, + "loss_iou": 0.275390625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 499126564, + "step": 5147 + }, + { + "epoch": 0.503324208056316, + "grad_norm": 21.687651839319525, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 499224824, + "step": 5148 + }, + { + "epoch": 0.503324208056316, + "loss": 0.07291342318058014, + "loss_ce": 0.0036079958081245422, + "loss_iou": 0.30859375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 499224824, + "step": 5148 + }, + { + "epoch": 0.5034219788815018, + "grad_norm": 13.10317249824764, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 499321236, + "step": 5149 + }, + { + "epoch": 0.5034219788815018, + "loss": 0.10047444701194763, + "loss_ce": 0.004710285924375057, + "loss_iou": 0.267578125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 499321236, + "step": 5149 + }, + { + "epoch": 0.5035197497066876, + "grad_norm": 3.1511446490567203, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 499417740, + "step": 5150 + }, + { + "epoch": 0.5035197497066876, + "loss": 0.06991134583950043, + "loss_ce": 0.004878382198512554, + "loss_iou": 0.21875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 499417740, + "step": 5150 + }, + { + "epoch": 0.5036175205318733, + "grad_norm": 5.6659961710244655, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 499514440, + "step": 5151 + }, + { + "epoch": 0.5036175205318733, + "loss": 0.1162739172577858, + "loss_ce": 0.0038547867443412542, + "loss_iou": 0.26953125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 499514440, + "step": 5151 + }, + { + "epoch": 0.5037152913570591, + "grad_norm": 4.1548189838286715, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 499611820, + "step": 5152 + }, + { + "epoch": 0.5037152913570591, + "loss": 0.07589968293905258, + "loss_ce": 0.010561545379459858, + "loss_iou": 0.291015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 499611820, + "step": 5152 + }, + { + "epoch": 0.5038130621822449, + "grad_norm": 6.3967092690719, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 499709124, + "step": 5153 + }, + { + "epoch": 0.5038130621822449, + "loss": 0.08088874816894531, + "loss_ce": 0.004915233701467514, + "loss_iou": 0.365234375, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 499709124, + "step": 5153 + }, + { + "epoch": 0.5039108330074306, + "grad_norm": 13.343101310780018, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 499806380, + "step": 5154 + }, + { + "epoch": 0.5039108330074306, + "loss": 0.08290202915668488, + "loss_ce": 0.0037699462845921516, + "loss_iou": 0.25, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 499806380, + "step": 5154 + }, + { + "epoch": 0.5040086038326164, + "grad_norm": 19.252118129692803, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 499903164, + "step": 5155 + }, + { + "epoch": 0.5040086038326164, + "loss": 0.13149648904800415, + "loss_ce": 0.00674062455072999, + "loss_iou": 0.3046875, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 499903164, + "step": 5155 + }, + { + "epoch": 0.5041063746578021, + "grad_norm": 3.7765218050957015, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 499999384, + "step": 5156 + }, + { + "epoch": 0.5041063746578021, + "loss": 0.06046546995639801, + "loss_ce": 0.004999772645533085, + "loss_iou": 0.248046875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 499999384, + "step": 5156 + }, + { + "epoch": 0.5042041454829879, + "grad_norm": 4.330522989035902, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 500095700, + "step": 5157 + }, + { + "epoch": 0.5042041454829879, + "loss": 0.1392332911491394, + "loss_ce": 0.0013701333664357662, + "loss_iou": 0.322265625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 500095700, + "step": 5157 + }, + { + "epoch": 0.5043019163081737, + "grad_norm": 3.9121109916623182, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 500193012, + "step": 5158 + }, + { + "epoch": 0.5043019163081737, + "loss": 0.08045527338981628, + "loss_ce": 0.003314471337944269, + "loss_iou": 0.232421875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 500193012, + "step": 5158 + }, + { + "epoch": 0.5043996871333594, + "grad_norm": 12.787876946901443, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 500290908, + "step": 5159 + }, + { + "epoch": 0.5043996871333594, + "loss": 0.062316082417964935, + "loss_ce": 0.004866741597652435, + "loss_iou": 0.353515625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 500290908, + "step": 5159 + }, + { + "epoch": 0.5044974579585452, + "grad_norm": 20.015686409527483, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 500388060, + "step": 5160 + }, + { + "epoch": 0.5044974579585452, + "loss": 0.06914086639881134, + "loss_ce": 0.0029940204694867134, + "loss_iou": 0.2431640625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 500388060, + "step": 5160 + }, + { + "epoch": 0.504595228783731, + "grad_norm": 2.857459610695181, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 500484036, + "step": 5161 + }, + { + "epoch": 0.504595228783731, + "loss": 0.06359679251909256, + "loss_ce": 0.00531203206628561, + "loss_iou": 0.2734375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 500484036, + "step": 5161 + }, + { + "epoch": 0.5046929996089167, + "grad_norm": 6.185645154842376, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 500581324, + "step": 5162 + }, + { + "epoch": 0.5046929996089167, + "loss": 0.04083889350295067, + "loss_ce": 0.009146389551460743, + "loss_iou": 0.291015625, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 500581324, + "step": 5162 + }, + { + "epoch": 0.5047907704341025, + "grad_norm": 8.583582079563604, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 500678056, + "step": 5163 + }, + { + "epoch": 0.5047907704341025, + "loss": 0.08722253888845444, + "loss_ce": 0.005114991217851639, + "loss_iou": 0.337890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 500678056, + "step": 5163 + }, + { + "epoch": 0.5048885412592883, + "grad_norm": 19.305276965877145, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 500773996, + "step": 5164 + }, + { + "epoch": 0.5048885412592883, + "loss": 0.0716397762298584, + "loss_ce": 0.0030057397671043873, + "loss_iou": 0.267578125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 500773996, + "step": 5164 + }, + { + "epoch": 0.504986312084474, + "grad_norm": 18.7235991521747, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 500870844, + "step": 5165 + }, + { + "epoch": 0.504986312084474, + "loss": 0.09865550696849823, + "loss_ce": 0.008994857780635357, + "loss_iou": 0.302734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 500870844, + "step": 5165 + }, + { + "epoch": 0.5050840829096598, + "grad_norm": 3.7032019551317745, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 500967584, + "step": 5166 + }, + { + "epoch": 0.5050840829096598, + "loss": 0.08946127444505692, + "loss_ce": 0.0025472105480730534, + "loss_iou": 0.326171875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 500967584, + "step": 5166 + }, + { + "epoch": 0.5051818537348455, + "grad_norm": 5.737762676846418, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 501064680, + "step": 5167 + }, + { + "epoch": 0.5051818537348455, + "loss": 0.10512275248765945, + "loss_ce": 0.0056507037952542305, + "loss_iou": 0.318359375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 501064680, + "step": 5167 + }, + { + "epoch": 0.5052796245600313, + "grad_norm": 9.726878297531696, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 501161508, + "step": 5168 + }, + { + "epoch": 0.5052796245600313, + "loss": 0.1339205652475357, + "loss_ce": 0.003793608397245407, + "loss_iou": 0.306640625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 501161508, + "step": 5168 + }, + { + "epoch": 0.5053773953852171, + "grad_norm": 10.31508242281127, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 501258652, + "step": 5169 + }, + { + "epoch": 0.5053773953852171, + "loss": 0.13063928484916687, + "loss_ce": 0.005395154934376478, + "loss_iou": 0.275390625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 501258652, + "step": 5169 + }, + { + "epoch": 0.5054751662104028, + "grad_norm": 3.864827750927423, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 501355332, + "step": 5170 + }, + { + "epoch": 0.5054751662104028, + "loss": 0.05840514600276947, + "loss_ce": 0.0037329052574932575, + "loss_iou": 0.29296875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 501355332, + "step": 5170 + }, + { + "epoch": 0.5055729370355886, + "grad_norm": 4.4434334783456455, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 501452168, + "step": 5171 + }, + { + "epoch": 0.5055729370355886, + "loss": 0.03890141844749451, + "loss_ce": 0.008406732231378555, + "loss_iou": 0.251953125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 501452168, + "step": 5171 + }, + { + "epoch": 0.5056707078607744, + "grad_norm": 32.90176813836075, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 501549640, + "step": 5172 + }, + { + "epoch": 0.5056707078607744, + "loss": 0.0661751925945282, + "loss_ce": 0.007886619307100773, + "loss_iou": 0.330078125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 501549640, + "step": 5172 + }, + { + "epoch": 0.5057684786859601, + "grad_norm": 58.85725683687962, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 501646860, + "step": 5173 + }, + { + "epoch": 0.5057684786859601, + "loss": 0.08587366342544556, + "loss_ce": 0.006466922350227833, + "loss_iou": 0.3203125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 501646860, + "step": 5173 + }, + { + "epoch": 0.5058662495111459, + "grad_norm": 14.868116911551867, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 501743908, + "step": 5174 + }, + { + "epoch": 0.5058662495111459, + "loss": 0.07165150344371796, + "loss_ce": 0.005176592152565718, + "loss_iou": 0.314453125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 501743908, + "step": 5174 + }, + { + "epoch": 0.5059640203363316, + "grad_norm": 5.175783883134724, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 501840028, + "step": 5175 + }, + { + "epoch": 0.5059640203363316, + "loss": 0.05976512283086777, + "loss_ce": 0.00507761724293232, + "loss_iou": 0.22265625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 501840028, + "step": 5175 + }, + { + "epoch": 0.5060617911615174, + "grad_norm": 6.489901456205554, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 501937652, + "step": 5176 + }, + { + "epoch": 0.5060617911615174, + "loss": 0.07338231801986694, + "loss_ce": 0.0027646415401250124, + "loss_iou": 0.34765625, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 501937652, + "step": 5176 + }, + { + "epoch": 0.5061595619867032, + "grad_norm": 8.424330045449015, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 502035008, + "step": 5177 + }, + { + "epoch": 0.5061595619867032, + "loss": 0.03943559527397156, + "loss_ce": 0.007888050749897957, + "loss_iou": 0.255859375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 502035008, + "step": 5177 + }, + { + "epoch": 0.5062573328118889, + "grad_norm": 8.658914343876846, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 502132056, + "step": 5178 + }, + { + "epoch": 0.5062573328118889, + "loss": 0.08899956941604614, + "loss_ce": 0.004236993379890919, + "loss_iou": 0.419921875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 502132056, + "step": 5178 + }, + { + "epoch": 0.5063551036370747, + "grad_norm": 7.218200723432543, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 502229416, + "step": 5179 + }, + { + "epoch": 0.5063551036370747, + "loss": 0.06185253709554672, + "loss_ce": 0.003258787328377366, + "loss_iou": 0.27734375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 502229416, + "step": 5179 + }, + { + "epoch": 0.5064528744622605, + "grad_norm": 10.938592049052211, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 502326280, + "step": 5180 + }, + { + "epoch": 0.5064528744622605, + "loss": 0.055335234850645065, + "loss_ce": 0.0039741480723023415, + "loss_iou": 0.251953125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 502326280, + "step": 5180 + }, + { + "epoch": 0.5065506452874462, + "grad_norm": 20.425145892301128, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 502423384, + "step": 5181 + }, + { + "epoch": 0.5065506452874462, + "loss": 0.10953593254089355, + "loss_ce": 0.0068137748166918755, + "loss_iou": 0.259765625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 502423384, + "step": 5181 + }, + { + "epoch": 0.506648416112632, + "grad_norm": 7.971025762677995, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 502520924, + "step": 5182 + }, + { + "epoch": 0.506648416112632, + "loss": 0.1118754893541336, + "loss_ce": 0.004972411319613457, + "loss_iou": 0.248046875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 502520924, + "step": 5182 + }, + { + "epoch": 0.5067461869378177, + "grad_norm": 4.038858260836607, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 502618372, + "step": 5183 + }, + { + "epoch": 0.5067461869378177, + "loss": 0.08324387669563293, + "loss_ce": 0.0014338790206238627, + "loss_iou": 0.30859375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 502618372, + "step": 5183 + }, + { + "epoch": 0.5068439577630035, + "grad_norm": 54.59471917526796, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 502715644, + "step": 5184 + }, + { + "epoch": 0.5068439577630035, + "loss": 0.06592166423797607, + "loss_ce": 0.004245638847351074, + "loss_iou": 0.369140625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 502715644, + "step": 5184 + }, + { + "epoch": 0.5069417285881893, + "grad_norm": 93.00619552759281, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 502812736, + "step": 5185 + }, + { + "epoch": 0.5069417285881893, + "loss": 0.1029716208577156, + "loss_ce": 0.005467966664582491, + "loss_iou": 0.376953125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 502812736, + "step": 5185 + }, + { + "epoch": 0.507039499413375, + "grad_norm": 24.045711389528606, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 502909476, + "step": 5186 + }, + { + "epoch": 0.507039499413375, + "loss": 0.10419633984565735, + "loss_ce": 0.003106866031885147, + "loss_iou": 0.359375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 502909476, + "step": 5186 + }, + { + "epoch": 0.5071372702385608, + "grad_norm": 4.728560352665818, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 503006124, + "step": 5187 + }, + { + "epoch": 0.5071372702385608, + "loss": 0.07370029389858246, + "loss_ce": 0.002296787453815341, + "loss_iou": 0.181640625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 503006124, + "step": 5187 + }, + { + "epoch": 0.5072350410637466, + "grad_norm": 2.9821842755191477, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 503103164, + "step": 5188 + }, + { + "epoch": 0.5072350410637466, + "loss": 0.08400921523571014, + "loss_ce": 0.009180111810564995, + "loss_iou": 0.357421875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 503103164, + "step": 5188 + }, + { + "epoch": 0.5073328118889323, + "grad_norm": 13.19489787260254, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 503200076, + "step": 5189 + }, + { + "epoch": 0.5073328118889323, + "loss": 0.05663473159074783, + "loss_ce": 0.0055406782776117325, + "loss_iou": 0.34765625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 503200076, + "step": 5189 + }, + { + "epoch": 0.5074305827141181, + "grad_norm": 3.951776917086508, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 503297400, + "step": 5190 + }, + { + "epoch": 0.5074305827141181, + "loss": 0.10046854615211487, + "loss_ce": 0.004307655617594719, + "loss_iou": 0.318359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 503297400, + "step": 5190 + }, + { + "epoch": 0.5075283535393039, + "grad_norm": 7.153478685992315, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 503395288, + "step": 5191 + }, + { + "epoch": 0.5075283535393039, + "loss": 0.0604475773870945, + "loss_ce": 0.008094673976302147, + "loss_iou": 0.35546875, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 503395288, + "step": 5191 + }, + { + "epoch": 0.5076261243644896, + "grad_norm": 7.787497998315415, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 503492364, + "step": 5192 + }, + { + "epoch": 0.5076261243644896, + "loss": 0.0805261954665184, + "loss_ce": 0.00568946311250329, + "loss_iou": 0.287109375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 503492364, + "step": 5192 + }, + { + "epoch": 0.5077238951896754, + "grad_norm": 4.6132012150763035, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 503589128, + "step": 5193 + }, + { + "epoch": 0.5077238951896754, + "loss": 0.13654614984989166, + "loss_ce": 0.00791456364095211, + "loss_iou": 0.384765625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 503589128, + "step": 5193 + }, + { + "epoch": 0.5078216660148611, + "grad_norm": 3.061958491040834, + "learning_rate": 5e-05, + "loss": 0.1205, + "num_input_tokens_seen": 503686840, + "step": 5194 + }, + { + "epoch": 0.5078216660148611, + "loss": 0.12151169776916504, + "loss_ce": 0.012197734788060188, + "loss_iou": 0.3046875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 503686840, + "step": 5194 + }, + { + "epoch": 0.5079194368400469, + "grad_norm": 4.107162159994294, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 503784844, + "step": 5195 + }, + { + "epoch": 0.5079194368400469, + "loss": 0.06744667142629623, + "loss_ce": 0.002612075302749872, + "loss_iou": 0.265625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 503784844, + "step": 5195 + }, + { + "epoch": 0.5080172076652327, + "grad_norm": 8.855047684555215, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 503882492, + "step": 5196 + }, + { + "epoch": 0.5080172076652327, + "loss": 0.07619623839855194, + "loss_ce": 0.006120249163359404, + "loss_iou": 0.3125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 503882492, + "step": 5196 + }, + { + "epoch": 0.5081149784904184, + "grad_norm": 5.356521111134933, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 503979124, + "step": 5197 + }, + { + "epoch": 0.5081149784904184, + "loss": 0.10175773501396179, + "loss_ce": 0.0018737006466835737, + "loss_iou": 0.328125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 503979124, + "step": 5197 + }, + { + "epoch": 0.5082127493156042, + "grad_norm": 20.879803179644224, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 504075792, + "step": 5198 + }, + { + "epoch": 0.5082127493156042, + "loss": 0.09479457139968872, + "loss_ce": 0.00841456837952137, + "loss_iou": 0.25390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 504075792, + "step": 5198 + }, + { + "epoch": 0.50831052014079, + "grad_norm": 4.262093559814807, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 504172416, + "step": 5199 + }, + { + "epoch": 0.50831052014079, + "loss": 0.06241903454065323, + "loss_ce": 0.005549530498683453, + "loss_iou": 0.251953125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 504172416, + "step": 5199 + }, + { + "epoch": 0.5084082909659757, + "grad_norm": 6.283112721991414, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 504269940, + "step": 5200 + }, + { + "epoch": 0.5084082909659757, + "loss": 0.1368921995162964, + "loss_ce": 0.007665516342967749, + "loss_iou": 0.2734375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 504269940, + "step": 5200 + }, + { + "epoch": 0.5085060617911615, + "grad_norm": 8.985882217837572, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 504367080, + "step": 5201 + }, + { + "epoch": 0.5085060617911615, + "loss": 0.15009655058383942, + "loss_ce": 0.005000731907784939, + "loss_iou": 0.259765625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 504367080, + "step": 5201 + }, + { + "epoch": 0.5086038326163472, + "grad_norm": 5.5534304661706, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 504463464, + "step": 5202 + }, + { + "epoch": 0.5086038326163472, + "loss": 0.09869465231895447, + "loss_ce": 0.007111395709216595, + "loss_iou": 0.259765625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 504463464, + "step": 5202 + }, + { + "epoch": 0.508701603441533, + "grad_norm": 5.071639153794999, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 504560204, + "step": 5203 + }, + { + "epoch": 0.508701603441533, + "loss": 0.1143893301486969, + "loss_ce": 0.007608318701386452, + "loss_iou": 0.328125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 504560204, + "step": 5203 + }, + { + "epoch": 0.5087993742667188, + "grad_norm": 4.610934273751694, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 504657804, + "step": 5204 + }, + { + "epoch": 0.5087993742667188, + "loss": 0.10730551183223724, + "loss_ce": 0.007268895395100117, + "loss_iou": 0.31640625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 504657804, + "step": 5204 + }, + { + "epoch": 0.5088971450919045, + "grad_norm": 4.563848546460817, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 504754340, + "step": 5205 + }, + { + "epoch": 0.5088971450919045, + "loss": 0.0842578336596489, + "loss_ce": 0.005270713474601507, + "loss_iou": 0.3359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 504754340, + "step": 5205 + }, + { + "epoch": 0.5089949159170903, + "grad_norm": 4.1994634551381305, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 504850800, + "step": 5206 + }, + { + "epoch": 0.5089949159170903, + "loss": 0.09713698923587799, + "loss_ce": 0.007430569268763065, + "loss_iou": 0.3125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 504850800, + "step": 5206 + }, + { + "epoch": 0.5090926867422761, + "grad_norm": 2.7912375570052474, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 504948300, + "step": 5207 + }, + { + "epoch": 0.5090926867422761, + "loss": 0.07116645574569702, + "loss_ce": 0.004012522287666798, + "loss_iou": 0.310546875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 504948300, + "step": 5207 + }, + { + "epoch": 0.5091904575674618, + "grad_norm": 3.8923164627614333, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 505045132, + "step": 5208 + }, + { + "epoch": 0.5091904575674618, + "loss": 0.07577399164438248, + "loss_ce": 0.004713810980319977, + "loss_iou": 0.205078125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 505045132, + "step": 5208 + }, + { + "epoch": 0.5092882283926476, + "grad_norm": 2.5648867464024234, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 505142320, + "step": 5209 + }, + { + "epoch": 0.5092882283926476, + "loss": 0.06739870458841324, + "loss_ce": 0.0032965331338346004, + "loss_iou": 0.330078125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 505142320, + "step": 5209 + }, + { + "epoch": 0.5093859992178335, + "grad_norm": 7.428116875310789, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 505239880, + "step": 5210 + }, + { + "epoch": 0.5093859992178335, + "loss": 0.07862462103366852, + "loss_ce": 0.0024222303181886673, + "loss_iou": 0.359375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 505239880, + "step": 5210 + }, + { + "epoch": 0.5094837700430191, + "grad_norm": 15.91877629792405, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 505336576, + "step": 5211 + }, + { + "epoch": 0.5094837700430191, + "loss": 0.04364960640668869, + "loss_ce": 0.005495005752891302, + "loss_iou": 0.294921875, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 505336576, + "step": 5211 + }, + { + "epoch": 0.509581540868205, + "grad_norm": 11.153003819705063, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 505434468, + "step": 5212 + }, + { + "epoch": 0.509581540868205, + "loss": 0.062233660370111465, + "loss_ce": 0.0044333660043776035, + "loss_iou": 0.314453125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 505434468, + "step": 5212 + }, + { + "epoch": 0.5096793116933906, + "grad_norm": 9.950903791841863, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 505531312, + "step": 5213 + }, + { + "epoch": 0.5096793116933906, + "loss": 0.1039508730173111, + "loss_ce": 0.011330022476613522, + "loss_iou": 0.3125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 505531312, + "step": 5213 + }, + { + "epoch": 0.5097770825185765, + "grad_norm": 8.261653568589072, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 505628484, + "step": 5214 + }, + { + "epoch": 0.5097770825185765, + "loss": 0.07431057840585709, + "loss_ce": 0.003586090635508299, + "loss_iou": 0.376953125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 505628484, + "step": 5214 + }, + { + "epoch": 0.5098748533437623, + "grad_norm": 1.840372462588455, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 505726512, + "step": 5215 + }, + { + "epoch": 0.5098748533437623, + "loss": 0.08997360616922379, + "loss_ce": 0.010307468473911285, + "loss_iou": 0.30859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 505726512, + "step": 5215 + }, + { + "epoch": 0.509972624168948, + "grad_norm": 3.0605959528981557, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 505823392, + "step": 5216 + }, + { + "epoch": 0.509972624168948, + "loss": 0.05958160012960434, + "loss_ce": 0.0028675447683781385, + "loss_iou": 0.1904296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 505823392, + "step": 5216 + }, + { + "epoch": 0.5100703949941338, + "grad_norm": 3.0354382161017415, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 505920436, + "step": 5217 + }, + { + "epoch": 0.5100703949941338, + "loss": 0.06003227457404137, + "loss_ce": 0.0012935663107782602, + "loss_iou": 0.271484375, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 505920436, + "step": 5217 + }, + { + "epoch": 0.5101681658193196, + "grad_norm": 6.52995973324014, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 506017372, + "step": 5218 + }, + { + "epoch": 0.5101681658193196, + "loss": 0.07017552852630615, + "loss_ce": 0.007263540290296078, + "loss_iou": 0.310546875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 506017372, + "step": 5218 + }, + { + "epoch": 0.5102659366445053, + "grad_norm": 15.898666699047613, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 506114596, + "step": 5219 + }, + { + "epoch": 0.5102659366445053, + "loss": 0.09534116834402084, + "loss_ce": 0.005829302594065666, + "loss_iou": 0.25390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 506114596, + "step": 5219 + }, + { + "epoch": 0.5103637074696911, + "grad_norm": 5.661490427361347, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 506210520, + "step": 5220 + }, + { + "epoch": 0.5103637074696911, + "loss": 0.04317742586135864, + "loss_ce": 0.002737822476774454, + "loss_iou": 0.1865234375, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 506210520, + "step": 5220 + }, + { + "epoch": 0.5104614782948768, + "grad_norm": 4.221378217075456, + "learning_rate": 5e-05, + "loss": 0.1278, + "num_input_tokens_seen": 506307512, + "step": 5221 + }, + { + "epoch": 0.5104614782948768, + "loss": 0.11130562424659729, + "loss_ce": 0.012709050439298153, + "loss_iou": 0.189453125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 506307512, + "step": 5221 + }, + { + "epoch": 0.5105592491200626, + "grad_norm": 12.168617488974675, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 506404408, + "step": 5222 + }, + { + "epoch": 0.5105592491200626, + "loss": 0.07970073074102402, + "loss_ce": 0.005474353674799204, + "loss_iou": 0.294921875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 506404408, + "step": 5222 + }, + { + "epoch": 0.5106570199452484, + "grad_norm": 18.344337351376254, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 506501596, + "step": 5223 + }, + { + "epoch": 0.5106570199452484, + "loss": 0.12527740001678467, + "loss_ce": 0.005846850108355284, + "loss_iou": 0.35546875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 506501596, + "step": 5223 + }, + { + "epoch": 0.5107547907704341, + "grad_norm": 16.62871117157659, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 506598740, + "step": 5224 + }, + { + "epoch": 0.5107547907704341, + "loss": 0.06537513434886932, + "loss_ce": 0.00400428706780076, + "loss_iou": 0.33984375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 506598740, + "step": 5224 + }, + { + "epoch": 0.5108525615956199, + "grad_norm": 4.96358508089818, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 506696032, + "step": 5225 + }, + { + "epoch": 0.5108525615956199, + "loss": 0.07227674126625061, + "loss_ce": 0.006496103946119547, + "loss_iou": 0.326171875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 506696032, + "step": 5225 + }, + { + "epoch": 0.5109503324208057, + "grad_norm": 3.9124668893371517, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 506792940, + "step": 5226 + }, + { + "epoch": 0.5109503324208057, + "loss": 0.08722775429487228, + "loss_ce": 0.008141448721289635, + "loss_iou": 0.333984375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 506792940, + "step": 5226 + }, + { + "epoch": 0.5110481032459914, + "grad_norm": 8.848429506428769, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 506890416, + "step": 5227 + }, + { + "epoch": 0.5110481032459914, + "loss": 0.09129379689693451, + "loss_ce": 0.003494719974696636, + "loss_iou": 0.4609375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 506890416, + "step": 5227 + }, + { + "epoch": 0.5111458740711772, + "grad_norm": 8.778842596979633, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 506988504, + "step": 5228 + }, + { + "epoch": 0.5111458740711772, + "loss": 0.09925617277622223, + "loss_ce": 0.003980291076004505, + "loss_iou": 0.375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 506988504, + "step": 5228 + }, + { + "epoch": 0.5112436448963629, + "grad_norm": 8.31562449002375, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 507085996, + "step": 5229 + }, + { + "epoch": 0.5112436448963629, + "loss": 0.07611922919750214, + "loss_ce": 0.0030601443722844124, + "loss_iou": 0.396484375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 507085996, + "step": 5229 + }, + { + "epoch": 0.5113414157215487, + "grad_norm": 7.316728665175361, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 507183108, + "step": 5230 + }, + { + "epoch": 0.5113414157215487, + "loss": 0.09407678246498108, + "loss_ce": 0.0067965141497552395, + "loss_iou": 0.453125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 507183108, + "step": 5230 + }, + { + "epoch": 0.5114391865467345, + "grad_norm": 14.420615490907343, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 507280432, + "step": 5231 + }, + { + "epoch": 0.5114391865467345, + "loss": 0.08763455599546432, + "loss_ce": 0.0014681769534945488, + "loss_iou": 0.349609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 507280432, + "step": 5231 + }, + { + "epoch": 0.5115369573719202, + "grad_norm": 5.557904176230753, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 507377556, + "step": 5232 + }, + { + "epoch": 0.5115369573719202, + "loss": 0.07823137938976288, + "loss_ce": 0.005096000619232655, + "loss_iou": 0.25, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 507377556, + "step": 5232 + }, + { + "epoch": 0.511634728197106, + "grad_norm": 3.632482774110592, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 507474620, + "step": 5233 + }, + { + "epoch": 0.511634728197106, + "loss": 0.0905895084142685, + "loss_ce": 0.007657989393919706, + "loss_iou": 0.302734375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 507474620, + "step": 5233 + }, + { + "epoch": 0.5117324990222918, + "grad_norm": 1.978502821085464, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 507572120, + "step": 5234 + }, + { + "epoch": 0.5117324990222918, + "loss": 0.05578424781560898, + "loss_ce": 0.0043545011430978775, + "loss_iou": 0.291015625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 507572120, + "step": 5234 + }, + { + "epoch": 0.5118302698474775, + "grad_norm": 13.625402061099798, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 507670148, + "step": 5235 + }, + { + "epoch": 0.5118302698474775, + "loss": 0.1018947958946228, + "loss_ce": 0.005108301527798176, + "loss_iou": 0.259765625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 507670148, + "step": 5235 + }, + { + "epoch": 0.5119280406726633, + "grad_norm": 5.285108860667298, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 507766224, + "step": 5236 + }, + { + "epoch": 0.5119280406726633, + "loss": 0.06718385219573975, + "loss_ce": 0.006148692686110735, + "loss_iou": 0.220703125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 507766224, + "step": 5236 + }, + { + "epoch": 0.5120258114978491, + "grad_norm": 14.895979847020476, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 507862372, + "step": 5237 + }, + { + "epoch": 0.5120258114978491, + "loss": 0.06203148514032364, + "loss_ce": 0.0031096728052943945, + "loss_iou": 0.31640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 507862372, + "step": 5237 + }, + { + "epoch": 0.5121235823230348, + "grad_norm": 3.488442950741973, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 507958828, + "step": 5238 + }, + { + "epoch": 0.5121235823230348, + "loss": 0.11396878957748413, + "loss_ce": 0.0051888832822442055, + "loss_iou": 0.341796875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 507958828, + "step": 5238 + }, + { + "epoch": 0.5122213531482206, + "grad_norm": 14.210043763148175, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 508055776, + "step": 5239 + }, + { + "epoch": 0.5122213531482206, + "loss": 0.11515839397907257, + "loss_ce": 0.0089419549331069, + "loss_iou": 0.240234375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 508055776, + "step": 5239 + }, + { + "epoch": 0.5123191239734063, + "grad_norm": 5.999268073367252, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 508152756, + "step": 5240 + }, + { + "epoch": 0.5123191239734063, + "loss": 0.06219250336289406, + "loss_ce": 0.004987305495887995, + "loss_iou": 0.265625, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 508152756, + "step": 5240 + }, + { + "epoch": 0.5124168947985921, + "grad_norm": 3.6044803982584157, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 508250164, + "step": 5241 + }, + { + "epoch": 0.5124168947985921, + "loss": 0.06774906814098358, + "loss_ce": 0.004699748009443283, + "loss_iou": 0.318359375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 508250164, + "step": 5241 + }, + { + "epoch": 0.5125146656237779, + "grad_norm": 13.549564619868173, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 508346276, + "step": 5242 + }, + { + "epoch": 0.5125146656237779, + "loss": 0.07756085693836212, + "loss_ce": 0.004608583636581898, + "loss_iou": 0.2314453125, + "loss_num": 0.0146484375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 508346276, + "step": 5242 + }, + { + "epoch": 0.5126124364489636, + "grad_norm": 2.6233126248734906, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 508442400, + "step": 5243 + }, + { + "epoch": 0.5126124364489636, + "loss": 0.0745459571480751, + "loss_ce": 0.004904843866825104, + "loss_iou": 0.25, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 508442400, + "step": 5243 + }, + { + "epoch": 0.5127102072741494, + "grad_norm": 3.0382166180502503, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 508540052, + "step": 5244 + }, + { + "epoch": 0.5127102072741494, + "loss": 0.06718981266021729, + "loss_ce": 0.005132322199642658, + "loss_iou": 0.330078125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 508540052, + "step": 5244 + }, + { + "epoch": 0.5128079780993352, + "grad_norm": 5.141072335741019, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 508636512, + "step": 5245 + }, + { + "epoch": 0.5128079780993352, + "loss": 0.10566452145576477, + "loss_ce": 0.004071504808962345, + "loss_iou": 0.345703125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 508636512, + "step": 5245 + }, + { + "epoch": 0.5129057489245209, + "grad_norm": 3.7273397966032507, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 508733144, + "step": 5246 + }, + { + "epoch": 0.5129057489245209, + "loss": 0.09517434239387512, + "loss_ce": 0.006352927535772324, + "loss_iou": 0.322265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 508733144, + "step": 5246 + }, + { + "epoch": 0.5130035197497067, + "grad_norm": 7.207135560139866, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 508830288, + "step": 5247 + }, + { + "epoch": 0.5130035197497067, + "loss": 0.05367927998304367, + "loss_ce": 0.0023029360454529524, + "loss_iou": 0.357421875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 508830288, + "step": 5247 + }, + { + "epoch": 0.5131012905748924, + "grad_norm": 2.9117710145016704, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 508926916, + "step": 5248 + }, + { + "epoch": 0.5131012905748924, + "loss": 0.04764006286859512, + "loss_ce": 0.002199390670284629, + "loss_iou": 0.259765625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 508926916, + "step": 5248 + }, + { + "epoch": 0.5131990614000782, + "grad_norm": 6.460232287919567, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 509024420, + "step": 5249 + }, + { + "epoch": 0.5131990614000782, + "loss": 0.0953645333647728, + "loss_ce": 0.0028047191444784403, + "loss_iou": 0.283203125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 509024420, + "step": 5249 + }, + { + "epoch": 0.513296832225264, + "grad_norm": 6.133528908732629, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 509121892, + "step": 5250 + }, + { + "epoch": 0.513296832225264, + "eval_seeclick_CIoU": 0.5216715931892395, + "eval_seeclick_GIoU": 0.5244563519954681, + "eval_seeclick_IoU": 0.5602417588233948, + "eval_seeclick_MAE_all": 0.06375483982264996, + "eval_seeclick_MAE_h": 0.02759898453950882, + "eval_seeclick_MAE_w": 0.08583197370171547, + "eval_seeclick_MAE_x": 0.11062692478299141, + "eval_seeclick_MAE_y": 0.0309614771977067, + "eval_seeclick_NUM_probability": 0.9999909996986389, + "eval_seeclick_inside_bbox": 0.8295454680919647, + "eval_seeclick_loss": 0.27219516038894653, + "eval_seeclick_loss_ce": 0.009845985565334558, + "eval_seeclick_loss_iou": 0.45556640625, + "eval_seeclick_loss_num": 0.05446624755859375, + "eval_seeclick_loss_xval": 0.2723388671875, + "eval_seeclick_runtime": 75.6821, + "eval_seeclick_samples_per_second": 0.568, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 509121892, + "step": 5250 + }, + { + "epoch": 0.513296832225264, + "eval_icons_CIoU": 0.6068835854530334, + "eval_icons_GIoU": 0.6008056700229645, + "eval_icons_IoU": 0.6555752456188202, + "eval_icons_MAE_all": 0.07777822017669678, + "eval_icons_MAE_h": 0.09802135452628136, + "eval_icons_MAE_w": 0.05938622169196606, + "eval_icons_MAE_x": 0.06120220199227333, + "eval_icons_MAE_y": 0.09250310435891151, + "eval_icons_NUM_probability": 0.9999848008155823, + "eval_icons_inside_bbox": 0.7517361044883728, + "eval_icons_loss": 0.22129210829734802, + "eval_icons_loss_ce": 6.5131102928717155e-06, + "eval_icons_loss_iou": 0.44293212890625, + "eval_icons_loss_num": 0.04496002197265625, + "eval_icons_loss_xval": 0.22479248046875, + "eval_icons_runtime": 85.8029, + "eval_icons_samples_per_second": 0.583, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 509121892, + "step": 5250 + }, + { + "epoch": 0.513296832225264, + "eval_screenspot_CIoU": 0.3458736042181651, + "eval_screenspot_GIoU": 0.33803650736808777, + "eval_screenspot_IoU": 0.43349647521972656, + "eval_screenspot_MAE_all": 0.1475334291656812, + "eval_screenspot_MAE_h": 0.09873014440139134, + "eval_screenspot_MAE_w": 0.21546906729539236, + "eval_screenspot_MAE_x": 0.18781403948863348, + "eval_screenspot_MAE_y": 0.08812044809261958, + "eval_screenspot_NUM_probability": 0.9999710917472839, + "eval_screenspot_inside_bbox": 0.671666661898295, + "eval_screenspot_loss": 0.5239847898483276, + "eval_screenspot_loss_ce": 0.016176891202727955, + "eval_screenspot_loss_iou": 0.3808186848958333, + "eval_screenspot_loss_num": 0.10322062174479167, + "eval_screenspot_loss_xval": 0.5159505208333334, + "eval_screenspot_runtime": 147.9777, + "eval_screenspot_samples_per_second": 0.601, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 509121892, + "step": 5250 + }, + { + "epoch": 0.513296832225264, + "eval_compot_CIoU": 0.45616917312145233, + "eval_compot_GIoU": 0.45524781942367554, + "eval_compot_IoU": 0.5177817344665527, + "eval_compot_MAE_all": 0.0920683965086937, + "eval_compot_MAE_h": 0.07402987778186798, + "eval_compot_MAE_w": 0.11094792559742928, + "eval_compot_MAE_x": 0.10659899935126305, + "eval_compot_MAE_y": 0.07669677585363388, + "eval_compot_NUM_probability": 0.9999741911888123, + "eval_compot_inside_bbox": 0.6805555522441864, + "eval_compot_loss": 0.29247379302978516, + "eval_compot_loss_ce": 0.020568478852510452, + "eval_compot_loss_iou": 0.468505859375, + "eval_compot_loss_num": 0.048717498779296875, + "eval_compot_loss_xval": 0.2433929443359375, + "eval_compot_runtime": 88.3449, + "eval_compot_samples_per_second": 0.566, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 509121892, + "step": 5250 + }, + { + "epoch": 0.513296832225264, + "loss": 0.25136110186576843, + "loss_ce": 0.019671648740768433, + "loss_iou": 0.482421875, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 509121892, + "step": 5250 + }, + { + "epoch": 0.5133946030504497, + "grad_norm": 15.044436574017693, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 509218292, + "step": 5251 + }, + { + "epoch": 0.5133946030504497, + "loss": 0.13458950817584991, + "loss_ce": 0.007550553418695927, + "loss_iou": 0.251953125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 509218292, + "step": 5251 + }, + { + "epoch": 0.5134923738756355, + "grad_norm": 6.48935368693362, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 509315040, + "step": 5252 + }, + { + "epoch": 0.5134923738756355, + "loss": 0.0982271283864975, + "loss_ce": 0.0073000043630599976, + "loss_iou": 0.267578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 509315040, + "step": 5252 + }, + { + "epoch": 0.5135901447008213, + "grad_norm": 4.236220031917802, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 509411948, + "step": 5253 + }, + { + "epoch": 0.5135901447008213, + "loss": 0.08429069817066193, + "loss_ce": 0.0043956805020570755, + "loss_iou": 0.357421875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 509411948, + "step": 5253 + }, + { + "epoch": 0.513687915526007, + "grad_norm": 4.381024020259551, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 509508448, + "step": 5254 + }, + { + "epoch": 0.513687915526007, + "loss": 0.0765039473772049, + "loss_ce": 0.0033227959647774696, + "loss_iou": 0.302734375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 509508448, + "step": 5254 + }, + { + "epoch": 0.5137856863511928, + "grad_norm": 12.696459979854183, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 509605884, + "step": 5255 + }, + { + "epoch": 0.5137856863511928, + "loss": 0.07708723843097687, + "loss_ce": 0.004730060696601868, + "loss_iou": 0.3125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 509605884, + "step": 5255 + }, + { + "epoch": 0.5138834571763786, + "grad_norm": 15.314090806671521, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 509702976, + "step": 5256 + }, + { + "epoch": 0.5138834571763786, + "loss": 0.08400443196296692, + "loss_ce": 0.006489785388112068, + "loss_iou": 0.287109375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 509702976, + "step": 5256 + }, + { + "epoch": 0.5139812280015643, + "grad_norm": 5.998043978314225, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 509799744, + "step": 5257 + }, + { + "epoch": 0.5139812280015643, + "loss": 0.15359801054000854, + "loss_ce": 0.01208801381289959, + "loss_iou": 0.41796875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 509799744, + "step": 5257 + }, + { + "epoch": 0.5140789988267501, + "grad_norm": 13.486498986623255, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 509897128, + "step": 5258 + }, + { + "epoch": 0.5140789988267501, + "loss": 0.07516366988420486, + "loss_ce": 0.005156347993761301, + "loss_iou": 0.296875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 509897128, + "step": 5258 + }, + { + "epoch": 0.5141767696519358, + "grad_norm": 11.51131466712009, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 509993664, + "step": 5259 + }, + { + "epoch": 0.5141767696519358, + "loss": 0.0715274065732956, + "loss_ce": 0.006509704515337944, + "loss_iou": 0.2099609375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 509993664, + "step": 5259 + }, + { + "epoch": 0.5142745404771216, + "grad_norm": 32.133738133310715, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 510090348, + "step": 5260 + }, + { + "epoch": 0.5142745404771216, + "loss": 0.0526575967669487, + "loss_ce": 0.0029292041435837746, + "loss_iou": 0.2060546875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 510090348, + "step": 5260 + }, + { + "epoch": 0.5143723113023074, + "grad_norm": 11.381082764675213, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 510187072, + "step": 5261 + }, + { + "epoch": 0.5143723113023074, + "loss": 0.10894882678985596, + "loss_ce": 0.003968358505517244, + "loss_iou": 0.39453125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 510187072, + "step": 5261 + }, + { + "epoch": 0.5144700821274931, + "grad_norm": 6.016567420734032, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 510283948, + "step": 5262 + }, + { + "epoch": 0.5144700821274931, + "loss": 0.1197362095117569, + "loss_ce": 0.004608641378581524, + "loss_iou": 0.2890625, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 510283948, + "step": 5262 + }, + { + "epoch": 0.5145678529526789, + "grad_norm": 15.80787057691825, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 510381360, + "step": 5263 + }, + { + "epoch": 0.5145678529526789, + "loss": 0.10401013493537903, + "loss_ce": 0.009298838675022125, + "loss_iou": 0.302734375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 510381360, + "step": 5263 + }, + { + "epoch": 0.5146656237778647, + "grad_norm": 8.154174925708228, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 510477816, + "step": 5264 + }, + { + "epoch": 0.5146656237778647, + "loss": 0.06662149727344513, + "loss_ce": 0.004312228877097368, + "loss_iou": 0.27734375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 510477816, + "step": 5264 + }, + { + "epoch": 0.5147633946030504, + "grad_norm": 4.3525327218331835, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 510574888, + "step": 5265 + }, + { + "epoch": 0.5147633946030504, + "loss": 0.08217576146125793, + "loss_ce": 0.006705787032842636, + "loss_iou": 0.28515625, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 510574888, + "step": 5265 + }, + { + "epoch": 0.5148611654282362, + "grad_norm": 4.318136563717871, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 510672460, + "step": 5266 + }, + { + "epoch": 0.5148611654282362, + "loss": 0.0755348727107048, + "loss_ce": 0.0033302861265838146, + "loss_iou": 0.265625, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 510672460, + "step": 5266 + }, + { + "epoch": 0.5149589362534219, + "grad_norm": 7.453956911877228, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 510769424, + "step": 5267 + }, + { + "epoch": 0.5149589362534219, + "loss": 0.09007187932729721, + "loss_ce": 0.0057823252864181995, + "loss_iou": 0.2353515625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 510769424, + "step": 5267 + }, + { + "epoch": 0.5150567070786077, + "grad_norm": 11.623368515609059, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 510866348, + "step": 5268 + }, + { + "epoch": 0.5150567070786077, + "loss": 0.08218778669834137, + "loss_ce": 0.0042916699312627316, + "loss_iou": 0.25390625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 510866348, + "step": 5268 + }, + { + "epoch": 0.5151544779037935, + "grad_norm": 12.519403613054697, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 510963168, + "step": 5269 + }, + { + "epoch": 0.5151544779037935, + "loss": 0.07570037990808487, + "loss_ce": 0.00607452355325222, + "loss_iou": 0.259765625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 510963168, + "step": 5269 + }, + { + "epoch": 0.5152522487289792, + "grad_norm": 9.542209890333156, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 511060312, + "step": 5270 + }, + { + "epoch": 0.5152522487289792, + "loss": 0.08510305732488632, + "loss_ce": 0.00324728200212121, + "loss_iou": 0.369140625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 511060312, + "step": 5270 + }, + { + "epoch": 0.515350019554165, + "grad_norm": 13.763925233134255, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 511157460, + "step": 5271 + }, + { + "epoch": 0.515350019554165, + "loss": 0.12489795684814453, + "loss_ce": 0.007390021346509457, + "loss_iou": 0.279296875, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 511157460, + "step": 5271 + }, + { + "epoch": 0.5154477903793508, + "grad_norm": 10.85640762944548, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 511254268, + "step": 5272 + }, + { + "epoch": 0.5154477903793508, + "loss": 0.08669725805521011, + "loss_ce": 0.003674189792945981, + "loss_iou": 0.326171875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 511254268, + "step": 5272 + }, + { + "epoch": 0.5155455612045365, + "grad_norm": 17.22899257662434, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 511351856, + "step": 5273 + }, + { + "epoch": 0.5155455612045365, + "loss": 0.10867787897586823, + "loss_ce": 0.0029802394565194845, + "loss_iou": 0.298828125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 511351856, + "step": 5273 + }, + { + "epoch": 0.5156433320297223, + "grad_norm": 5.1244390348193845, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 511448272, + "step": 5274 + }, + { + "epoch": 0.5156433320297223, + "loss": 0.07972544431686401, + "loss_ce": 0.005476176738739014, + "loss_iou": 0.37109375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 511448272, + "step": 5274 + }, + { + "epoch": 0.515741102854908, + "grad_norm": 10.089287417698339, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 511545504, + "step": 5275 + }, + { + "epoch": 0.515741102854908, + "loss": 0.07620816677808762, + "loss_ce": 0.0018978624138981104, + "loss_iou": 0.390625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 511545504, + "step": 5275 + }, + { + "epoch": 0.5158388736800938, + "grad_norm": 23.103125203584447, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 511642148, + "step": 5276 + }, + { + "epoch": 0.5158388736800938, + "loss": 0.07290637493133545, + "loss_ce": 0.007140992674976587, + "loss_iou": 0.33203125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 511642148, + "step": 5276 + }, + { + "epoch": 0.5159366445052797, + "grad_norm": 7.911678717230961, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 511738864, + "step": 5277 + }, + { + "epoch": 0.5159366445052797, + "loss": 0.08411751687526703, + "loss_ce": 0.006297685205936432, + "loss_iou": 0.345703125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 511738864, + "step": 5277 + }, + { + "epoch": 0.5160344153304653, + "grad_norm": 4.373597769398411, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 511835972, + "step": 5278 + }, + { + "epoch": 0.5160344153304653, + "loss": 0.06839703023433685, + "loss_ce": 0.0038599795661866665, + "loss_iou": 0.271484375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 511835972, + "step": 5278 + }, + { + "epoch": 0.5161321861556512, + "grad_norm": 7.80336113422961, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 511933268, + "step": 5279 + }, + { + "epoch": 0.5161321861556512, + "loss": 0.11126242578029633, + "loss_ce": 0.007365324534475803, + "loss_iou": 0.318359375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 511933268, + "step": 5279 + }, + { + "epoch": 0.516229956980837, + "grad_norm": 2.8721958693013363, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 512029624, + "step": 5280 + }, + { + "epoch": 0.516229956980837, + "loss": 0.15876543521881104, + "loss_ce": 0.012845644727349281, + "loss_iou": 0.232421875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 512029624, + "step": 5280 + }, + { + "epoch": 0.5163277278060227, + "grad_norm": 2.3827087033045418, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 512126752, + "step": 5281 + }, + { + "epoch": 0.5163277278060227, + "loss": 0.06920447200536728, + "loss_ce": 0.004919189959764481, + "loss_iou": 0.357421875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 512126752, + "step": 5281 + }, + { + "epoch": 0.5164254986312085, + "grad_norm": 5.255139819931553, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 512223328, + "step": 5282 + }, + { + "epoch": 0.5164254986312085, + "loss": 0.029662571847438812, + "loss_ce": 0.0050806631334125996, + "loss_iou": 0.23046875, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 512223328, + "step": 5282 + }, + { + "epoch": 0.5165232694563943, + "grad_norm": 10.502458587738905, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 512320516, + "step": 5283 + }, + { + "epoch": 0.5165232694563943, + "loss": 0.09505634754896164, + "loss_ce": 0.004083449952304363, + "loss_iou": 0.34375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 512320516, + "step": 5283 + }, + { + "epoch": 0.51662104028158, + "grad_norm": 8.429161532147203, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 512417424, + "step": 5284 + }, + { + "epoch": 0.51662104028158, + "loss": 0.05932911857962608, + "loss_ce": 0.0034285441506654024, + "loss_iou": 0.314453125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 512417424, + "step": 5284 + }, + { + "epoch": 0.5167188111067658, + "grad_norm": 4.455603509413022, + "learning_rate": 5e-05, + "loss": 0.1096, + "num_input_tokens_seen": 512513424, + "step": 5285 + }, + { + "epoch": 0.5167188111067658, + "loss": 0.10284527391195297, + "loss_ce": 0.014832580462098122, + "loss_iou": 0.267578125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 512513424, + "step": 5285 + }, + { + "epoch": 0.5168165819319515, + "grad_norm": 18.41010938110723, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 512610888, + "step": 5286 + }, + { + "epoch": 0.5168165819319515, + "loss": 0.08522819727659225, + "loss_ce": 0.0043184650130569935, + "loss_iou": 0.33984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 512610888, + "step": 5286 + }, + { + "epoch": 0.5169143527571373, + "grad_norm": 3.748720525710335, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 512708020, + "step": 5287 + }, + { + "epoch": 0.5169143527571373, + "loss": 0.07148788869380951, + "loss_ce": 0.005508885253220797, + "loss_iou": 0.35546875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 512708020, + "step": 5287 + }, + { + "epoch": 0.5170121235823231, + "grad_norm": 12.48746854829595, + "learning_rate": 5e-05, + "loss": 0.1394, + "num_input_tokens_seen": 512804956, + "step": 5288 + }, + { + "epoch": 0.5170121235823231, + "loss": 0.18292810022830963, + "loss_ce": 0.005498895421624184, + "loss_iou": 0.2275390625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 512804956, + "step": 5288 + }, + { + "epoch": 0.5171098944075088, + "grad_norm": 5.49349712696351, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 512901868, + "step": 5289 + }, + { + "epoch": 0.5171098944075088, + "loss": 0.11397475749254227, + "loss_ce": 0.002005763351917267, + "loss_iou": 0.22265625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 512901868, + "step": 5289 + }, + { + "epoch": 0.5172076652326946, + "grad_norm": 5.574596972455475, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 512998316, + "step": 5290 + }, + { + "epoch": 0.5172076652326946, + "loss": 0.05364726483821869, + "loss_ce": 0.004712328780442476, + "loss_iou": 0.306640625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 512998316, + "step": 5290 + }, + { + "epoch": 0.5173054360578804, + "grad_norm": 4.40252878386799, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 513095640, + "step": 5291 + }, + { + "epoch": 0.5173054360578804, + "loss": 0.12552247941493988, + "loss_ce": 0.006351341027766466, + "loss_iou": 0.287109375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 513095640, + "step": 5291 + }, + { + "epoch": 0.5174032068830661, + "grad_norm": 2.7438096117682074, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 513192356, + "step": 5292 + }, + { + "epoch": 0.5174032068830661, + "loss": 0.0872737467288971, + "loss_ce": 0.004830511286854744, + "loss_iou": 0.287109375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 513192356, + "step": 5292 + }, + { + "epoch": 0.5175009777082519, + "grad_norm": 2.963431951167095, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 513289896, + "step": 5293 + }, + { + "epoch": 0.5175009777082519, + "loss": 0.09021991491317749, + "loss_ce": 0.0005516420933417976, + "loss_iou": 0.2890625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 513289896, + "step": 5293 + }, + { + "epoch": 0.5175987485334376, + "grad_norm": 4.139811872141738, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 513386468, + "step": 5294 + }, + { + "epoch": 0.5175987485334376, + "loss": 0.11184778809547424, + "loss_ce": 0.004151256289333105, + "loss_iou": 0.298828125, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 513386468, + "step": 5294 + }, + { + "epoch": 0.5176965193586234, + "grad_norm": 11.51557982727695, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 513483904, + "step": 5295 + }, + { + "epoch": 0.5176965193586234, + "loss": 0.07304742187261581, + "loss_ce": 0.008441707119345665, + "loss_iou": 0.263671875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 513483904, + "step": 5295 + }, + { + "epoch": 0.5177942901838092, + "grad_norm": 11.361178647162196, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 513580784, + "step": 5296 + }, + { + "epoch": 0.5177942901838092, + "loss": 0.09419377148151398, + "loss_ce": 0.00575382262468338, + "loss_iou": 0.365234375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 513580784, + "step": 5296 + }, + { + "epoch": 0.5178920610089949, + "grad_norm": 3.2291306583578843, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 513678104, + "step": 5297 + }, + { + "epoch": 0.5178920610089949, + "loss": 0.07920767366886139, + "loss_ce": 0.005187284201383591, + "loss_iou": 0.341796875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 513678104, + "step": 5297 + }, + { + "epoch": 0.5179898318341807, + "grad_norm": 2.5148079013285938, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 513774956, + "step": 5298 + }, + { + "epoch": 0.5179898318341807, + "loss": 0.06485278904438019, + "loss_ce": 0.005992013495415449, + "loss_iou": 0.25390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 513774956, + "step": 5298 + }, + { + "epoch": 0.5180876026593665, + "grad_norm": 4.146415568453503, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 513872216, + "step": 5299 + }, + { + "epoch": 0.5180876026593665, + "loss": 0.07501404732465744, + "loss_ce": 0.011522220447659492, + "loss_iou": 0.3046875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 513872216, + "step": 5299 + }, + { + "epoch": 0.5181853734845522, + "grad_norm": 8.876748425320695, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 513968488, + "step": 5300 + }, + { + "epoch": 0.5181853734845522, + "loss": 0.09524316340684891, + "loss_ce": 0.005838107317686081, + "loss_iou": 0.26171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 513968488, + "step": 5300 + }, + { + "epoch": 0.518283144309738, + "grad_norm": 11.820322754763977, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 514065624, + "step": 5301 + }, + { + "epoch": 0.518283144309738, + "loss": 0.08060070872306824, + "loss_ce": 0.0038795156870037317, + "loss_iou": 0.2294921875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 514065624, + "step": 5301 + }, + { + "epoch": 0.5183809151349238, + "grad_norm": 15.364631528620766, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 514163344, + "step": 5302 + }, + { + "epoch": 0.5183809151349238, + "loss": 0.07545246183872223, + "loss_ce": 0.006230966188013554, + "loss_iou": 0.37890625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 514163344, + "step": 5302 + }, + { + "epoch": 0.5184786859601095, + "grad_norm": 6.56062512051593, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 514260312, + "step": 5303 + }, + { + "epoch": 0.5184786859601095, + "loss": 0.07477515935897827, + "loss_ce": 0.00818580575287342, + "loss_iou": 0.25390625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 514260312, + "step": 5303 + }, + { + "epoch": 0.5185764567852953, + "grad_norm": 6.16522916930252, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 514357140, + "step": 5304 + }, + { + "epoch": 0.5185764567852953, + "loss": 0.06684578955173492, + "loss_ce": 0.005291835404932499, + "loss_iou": 0.35546875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 514357140, + "step": 5304 + }, + { + "epoch": 0.518674227610481, + "grad_norm": 12.687265647296487, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 514453932, + "step": 5305 + }, + { + "epoch": 0.518674227610481, + "loss": 0.13772690296173096, + "loss_ce": 0.0035106069408357143, + "loss_iou": 0.28515625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 514453932, + "step": 5305 + }, + { + "epoch": 0.5187719984356668, + "grad_norm": 23.890470211885475, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 514550904, + "step": 5306 + }, + { + "epoch": 0.5187719984356668, + "loss": 0.09536619484424591, + "loss_ce": 0.008223248645663261, + "loss_iou": 0.427734375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 514550904, + "step": 5306 + }, + { + "epoch": 0.5188697692608526, + "grad_norm": 10.151818553080188, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 514647900, + "step": 5307 + }, + { + "epoch": 0.5188697692608526, + "loss": 0.06798243522644043, + "loss_ce": 0.004994155839085579, + "loss_iou": 0.43359375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 514647900, + "step": 5307 + }, + { + "epoch": 0.5189675400860383, + "grad_norm": 11.058544683311347, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 514745292, + "step": 5308 + }, + { + "epoch": 0.5189675400860383, + "loss": 0.06423427909612656, + "loss_ce": 0.0033211989793926477, + "loss_iou": 0.41796875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 514745292, + "step": 5308 + }, + { + "epoch": 0.5190653109112241, + "grad_norm": 10.06992675598339, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 514842408, + "step": 5309 + }, + { + "epoch": 0.5190653109112241, + "loss": 0.08828000724315643, + "loss_ce": 0.004387186840176582, + "loss_iou": 0.330078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 514842408, + "step": 5309 + }, + { + "epoch": 0.5191630817364099, + "grad_norm": 10.94691008564467, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 514940324, + "step": 5310 + }, + { + "epoch": 0.5191630817364099, + "loss": 0.07523372769355774, + "loss_ce": 0.003334309672936797, + "loss_iou": 0.3359375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 514940324, + "step": 5310 + }, + { + "epoch": 0.5192608525615956, + "grad_norm": 5.168706408287578, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 515037580, + "step": 5311 + }, + { + "epoch": 0.5192608525615956, + "loss": 0.10130859911441803, + "loss_ce": 0.01054169051349163, + "loss_iou": 0.232421875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 515037580, + "step": 5311 + }, + { + "epoch": 0.5193586233867814, + "grad_norm": 18.58259463723268, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 515134796, + "step": 5312 + }, + { + "epoch": 0.5193586233867814, + "loss": 0.07172095775604248, + "loss_ce": 0.0021179947070777416, + "loss_iou": 0.27734375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 515134796, + "step": 5312 + }, + { + "epoch": 0.5194563942119671, + "grad_norm": 8.847423957627498, + "learning_rate": 5e-05, + "loss": 0.143, + "num_input_tokens_seen": 515232700, + "step": 5313 + }, + { + "epoch": 0.5194563942119671, + "loss": 0.09518872201442719, + "loss_ce": 0.007984739728271961, + "loss_iou": 0.255859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 515232700, + "step": 5313 + }, + { + "epoch": 0.5195541650371529, + "grad_norm": 7.91227116392833, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 515329724, + "step": 5314 + }, + { + "epoch": 0.5195541650371529, + "loss": 0.10730024427175522, + "loss_ce": 0.0055546388030052185, + "loss_iou": 0.37890625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 515329724, + "step": 5314 + }, + { + "epoch": 0.5196519358623387, + "grad_norm": 8.420250411915976, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 515426632, + "step": 5315 + }, + { + "epoch": 0.5196519358623387, + "loss": 0.10131266713142395, + "loss_ce": 0.0032749446108937263, + "loss_iou": 0.376953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 515426632, + "step": 5315 + }, + { + "epoch": 0.5197497066875244, + "grad_norm": 7.090072726335346, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 515524124, + "step": 5316 + }, + { + "epoch": 0.5197497066875244, + "loss": 0.04957909882068634, + "loss_ce": 0.005130245350301266, + "loss_iou": 0.380859375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 515524124, + "step": 5316 + }, + { + "epoch": 0.5198474775127102, + "grad_norm": 15.244071590189638, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 515621224, + "step": 5317 + }, + { + "epoch": 0.5198474775127102, + "loss": 0.06147795170545578, + "loss_ce": 0.0039523146115243435, + "loss_iou": 0.283203125, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 515621224, + "step": 5317 + }, + { + "epoch": 0.519945248337896, + "grad_norm": 4.585914671987231, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 515717956, + "step": 5318 + }, + { + "epoch": 0.519945248337896, + "loss": 0.11446970701217651, + "loss_ce": 0.004703697748482227, + "loss_iou": 0.287109375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 515717956, + "step": 5318 + }, + { + "epoch": 0.5200430191630817, + "grad_norm": 4.598512275924105, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 515814516, + "step": 5319 + }, + { + "epoch": 0.5200430191630817, + "loss": 0.06684067845344543, + "loss_ce": 0.008943110704421997, + "loss_iou": 0.1611328125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 515814516, + "step": 5319 + }, + { + "epoch": 0.5201407899882675, + "grad_norm": 8.008136136879118, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 515912056, + "step": 5320 + }, + { + "epoch": 0.5201407899882675, + "loss": 0.07226557284593582, + "loss_ce": 0.005500744096934795, + "loss_iou": 0.33984375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 515912056, + "step": 5320 + }, + { + "epoch": 0.5202385608134532, + "grad_norm": 14.227839614204692, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 516009144, + "step": 5321 + }, + { + "epoch": 0.5202385608134532, + "loss": 0.11950847506523132, + "loss_ce": 0.0036637503653764725, + "loss_iou": 0.341796875, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 516009144, + "step": 5321 + }, + { + "epoch": 0.520336331638639, + "grad_norm": 8.799077189285327, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 516106112, + "step": 5322 + }, + { + "epoch": 0.520336331638639, + "loss": 0.05596243962645531, + "loss_ce": 0.004067295230925083, + "loss_iou": 0.41015625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 516106112, + "step": 5322 + }, + { + "epoch": 0.5204341024638248, + "grad_norm": 19.95784995987262, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 516202836, + "step": 5323 + }, + { + "epoch": 0.5204341024638248, + "loss": 0.11058966815471649, + "loss_ce": 0.006021196022629738, + "loss_iou": 0.201171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 516202836, + "step": 5323 + }, + { + "epoch": 0.5205318732890105, + "grad_norm": 15.367914818586877, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 516300264, + "step": 5324 + }, + { + "epoch": 0.5205318732890105, + "loss": 0.05692574754357338, + "loss_ce": 0.00881478562951088, + "loss_iou": 0.3203125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 516300264, + "step": 5324 + }, + { + "epoch": 0.5206296441141963, + "grad_norm": 2.95740407794621, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 516396892, + "step": 5325 + }, + { + "epoch": 0.5206296441141963, + "loss": 0.07566149532794952, + "loss_ce": 0.003807853674516082, + "loss_iou": 0.244140625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 516396892, + "step": 5325 + }, + { + "epoch": 0.5207274149393821, + "grad_norm": 13.780705598704527, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 516493860, + "step": 5326 + }, + { + "epoch": 0.5207274149393821, + "loss": 0.07819800823926926, + "loss_ce": 0.00480323052033782, + "loss_iou": 0.349609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 516493860, + "step": 5326 + }, + { + "epoch": 0.5208251857645678, + "grad_norm": 3.1741918644792317, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 516590128, + "step": 5327 + }, + { + "epoch": 0.5208251857645678, + "loss": 0.10595263540744781, + "loss_ce": 0.002990140113979578, + "loss_iou": 0.255859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 516590128, + "step": 5327 + }, + { + "epoch": 0.5209229565897536, + "grad_norm": 9.25571515671517, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 516686432, + "step": 5328 + }, + { + "epoch": 0.5209229565897536, + "loss": 0.04524010047316551, + "loss_ce": 0.006765060126781464, + "loss_iou": 0.296875, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 516686432, + "step": 5328 + }, + { + "epoch": 0.5210207274149394, + "grad_norm": 12.669849964546904, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 516783256, + "step": 5329 + }, + { + "epoch": 0.5210207274149394, + "loss": 0.07128531485795975, + "loss_ce": 0.0032463748939335346, + "loss_iou": 0.25, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 516783256, + "step": 5329 + }, + { + "epoch": 0.5211184982401251, + "grad_norm": 3.8465134195198862, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 516880076, + "step": 5330 + }, + { + "epoch": 0.5211184982401251, + "loss": 0.05702945590019226, + "loss_ce": 0.005716058425605297, + "loss_iou": 0.3046875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 516880076, + "step": 5330 + }, + { + "epoch": 0.5212162690653109, + "grad_norm": 16.192349910419104, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 516976588, + "step": 5331 + }, + { + "epoch": 0.5212162690653109, + "loss": 0.12552890181541443, + "loss_ce": 0.0015207319520413876, + "loss_iou": 0.1953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 516976588, + "step": 5331 + }, + { + "epoch": 0.5213140398904966, + "grad_norm": 7.498534624171466, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 517073204, + "step": 5332 + }, + { + "epoch": 0.5213140398904966, + "loss": 0.08924441039562225, + "loss_ce": 0.004130885004997253, + "loss_iou": 0.2431640625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 517073204, + "step": 5332 + }, + { + "epoch": 0.5214118107156824, + "grad_norm": 6.70429268823359, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 517168976, + "step": 5333 + }, + { + "epoch": 0.5214118107156824, + "loss": 0.11014938354492188, + "loss_ce": 0.005954744294285774, + "loss_iou": 0.1953125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 517168976, + "step": 5333 + }, + { + "epoch": 0.5215095815408682, + "grad_norm": 3.078079727344826, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 517265616, + "step": 5334 + }, + { + "epoch": 0.5215095815408682, + "loss": 0.052149660885334015, + "loss_ce": 0.0033825719729065895, + "loss_iou": 0.265625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 517265616, + "step": 5334 + }, + { + "epoch": 0.5216073523660539, + "grad_norm": 8.167513072384644, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 517362900, + "step": 5335 + }, + { + "epoch": 0.5216073523660539, + "loss": 0.0385470986366272, + "loss_ce": 0.003879132680594921, + "loss_iou": 0.30859375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 517362900, + "step": 5335 + }, + { + "epoch": 0.5217051231912397, + "grad_norm": 3.9001425778273084, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 517459900, + "step": 5336 + }, + { + "epoch": 0.5217051231912397, + "loss": 0.08853350579738617, + "loss_ce": 0.0056477622129023075, + "loss_iou": 0.296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 517459900, + "step": 5336 + }, + { + "epoch": 0.5218028940164255, + "grad_norm": 5.444621530720619, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 517555748, + "step": 5337 + }, + { + "epoch": 0.5218028940164255, + "loss": 0.08597664535045624, + "loss_ce": 0.005623862147331238, + "loss_iou": 0.271484375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 517555748, + "step": 5337 + }, + { + "epoch": 0.5219006648416112, + "grad_norm": 2.753936559514021, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 517652484, + "step": 5338 + }, + { + "epoch": 0.5219006648416112, + "loss": 0.058465879410505295, + "loss_ce": 0.006433410104364157, + "loss_iou": 0.3046875, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 517652484, + "step": 5338 + }, + { + "epoch": 0.521998435666797, + "grad_norm": 6.988359028055793, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 517749824, + "step": 5339 + }, + { + "epoch": 0.521998435666797, + "loss": 0.06532877683639526, + "loss_ce": 0.006742651574313641, + "loss_iou": 0.2275390625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 517749824, + "step": 5339 + }, + { + "epoch": 0.5220962064919827, + "grad_norm": 3.1110859039513805, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 517847368, + "step": 5340 + }, + { + "epoch": 0.5220962064919827, + "loss": 0.040226250886917114, + "loss_ce": 0.004307062365114689, + "loss_iou": 0.37890625, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 517847368, + "step": 5340 + }, + { + "epoch": 0.5221939773171685, + "grad_norm": 4.3207744514011255, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 517943656, + "step": 5341 + }, + { + "epoch": 0.5221939773171685, + "loss": 0.059578798711299896, + "loss_ce": 0.013942142948508263, + "loss_iou": 0.2109375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 517943656, + "step": 5341 + }, + { + "epoch": 0.5222917481423544, + "grad_norm": 8.488315353215452, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 518041460, + "step": 5342 + }, + { + "epoch": 0.5222917481423544, + "loss": 0.10406696796417236, + "loss_ce": 0.007738227024674416, + "loss_iou": 0.322265625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 518041460, + "step": 5342 + }, + { + "epoch": 0.52238951896754, + "grad_norm": 5.0260756490761835, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 518138196, + "step": 5343 + }, + { + "epoch": 0.52238951896754, + "loss": 0.08399072289466858, + "loss_ce": 0.003164922585710883, + "loss_iou": 0.3046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 518138196, + "step": 5343 + }, + { + "epoch": 0.5224872897927259, + "grad_norm": 4.517662951312172, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 518234932, + "step": 5344 + }, + { + "epoch": 0.5224872897927259, + "loss": 0.04980210214853287, + "loss_ce": 0.004441537894308567, + "loss_iou": 0.353515625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 518234932, + "step": 5344 + }, + { + "epoch": 0.5225850606179117, + "grad_norm": 19.18414485820421, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 518331908, + "step": 5345 + }, + { + "epoch": 0.5225850606179117, + "loss": 0.06695261597633362, + "loss_ce": 0.006161598023027182, + "loss_iou": 0.23828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 518331908, + "step": 5345 + }, + { + "epoch": 0.5226828314430974, + "grad_norm": 16.478072686415747, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 518429204, + "step": 5346 + }, + { + "epoch": 0.5226828314430974, + "loss": 0.11978960782289505, + "loss_ce": 0.004837521351873875, + "loss_iou": 0.349609375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 518429204, + "step": 5346 + }, + { + "epoch": 0.5227806022682832, + "grad_norm": 5.465685684520879, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 518526552, + "step": 5347 + }, + { + "epoch": 0.5227806022682832, + "loss": 0.12010557949542999, + "loss_ce": 0.005756215192377567, + "loss_iou": 0.326171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 518526552, + "step": 5347 + }, + { + "epoch": 0.522878373093469, + "grad_norm": 13.574414849362425, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 518623260, + "step": 5348 + }, + { + "epoch": 0.522878373093469, + "loss": 0.09316714853048325, + "loss_ce": 0.004116859752684832, + "loss_iou": 0.291015625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 518623260, + "step": 5348 + }, + { + "epoch": 0.5229761439186547, + "grad_norm": 30.393673381105145, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 518719600, + "step": 5349 + }, + { + "epoch": 0.5229761439186547, + "loss": 0.10352769494056702, + "loss_ce": 0.004498153924942017, + "loss_iou": 0.298828125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 518719600, + "step": 5349 + }, + { + "epoch": 0.5230739147438405, + "grad_norm": 12.487687918282106, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 518816144, + "step": 5350 + }, + { + "epoch": 0.5230739147438405, + "loss": 0.09334097802639008, + "loss_ce": 0.0023375595919787884, + "loss_iou": 0.451171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 518816144, + "step": 5350 + }, + { + "epoch": 0.5231716855690262, + "grad_norm": 5.463958332578462, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 518913616, + "step": 5351 + }, + { + "epoch": 0.5231716855690262, + "loss": 0.04512866213917732, + "loss_ce": 0.005665617994964123, + "loss_iou": 0.25, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 518913616, + "step": 5351 + }, + { + "epoch": 0.523269456394212, + "grad_norm": 8.177830462876166, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 519009328, + "step": 5352 + }, + { + "epoch": 0.523269456394212, + "loss": 0.08460428565740585, + "loss_ce": 0.005716347135603428, + "loss_iou": 0.25, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 519009328, + "step": 5352 + }, + { + "epoch": 0.5233672272193978, + "grad_norm": 14.58627584507072, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 519105984, + "step": 5353 + }, + { + "epoch": 0.5233672272193978, + "loss": 0.0663132444024086, + "loss_ce": 0.007765271235257387, + "loss_iou": 0.27734375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 519105984, + "step": 5353 + }, + { + "epoch": 0.5234649980445835, + "grad_norm": 8.476269137232162, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 519203140, + "step": 5354 + }, + { + "epoch": 0.5234649980445835, + "loss": 0.05883592367172241, + "loss_ce": 0.0052089085802435875, + "loss_iou": 0.326171875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 519203140, + "step": 5354 + }, + { + "epoch": 0.5235627688697693, + "grad_norm": 11.138042680531866, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 519299968, + "step": 5355 + }, + { + "epoch": 0.5235627688697693, + "loss": 0.059799037873744965, + "loss_ce": 0.0036009156610816717, + "loss_iou": 0.423828125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 519299968, + "step": 5355 + }, + { + "epoch": 0.5236605396949551, + "grad_norm": 4.193520846561583, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 519396700, + "step": 5356 + }, + { + "epoch": 0.5236605396949551, + "loss": 0.0575619712471962, + "loss_ce": 0.004819967318326235, + "loss_iou": 0.201171875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 519396700, + "step": 5356 + }, + { + "epoch": 0.5237583105201408, + "grad_norm": 6.829270845075014, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 519493808, + "step": 5357 + }, + { + "epoch": 0.5237583105201408, + "loss": 0.06928057968616486, + "loss_ce": 0.0024165641516447067, + "loss_iou": 0.267578125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 519493808, + "step": 5357 + }, + { + "epoch": 0.5238560813453266, + "grad_norm": 3.660974165391995, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 519590404, + "step": 5358 + }, + { + "epoch": 0.5238560813453266, + "loss": 0.07668228447437286, + "loss_ce": 0.004244999960064888, + "loss_iou": 0.22265625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 519590404, + "step": 5358 + }, + { + "epoch": 0.5239538521705123, + "grad_norm": 6.320104285517391, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 519686772, + "step": 5359 + }, + { + "epoch": 0.5239538521705123, + "loss": 0.0835193544626236, + "loss_ce": 0.005287547595798969, + "loss_iou": 0.205078125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 519686772, + "step": 5359 + }, + { + "epoch": 0.5240516229956981, + "grad_norm": 1.8053786141315122, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 519783896, + "step": 5360 + }, + { + "epoch": 0.5240516229956981, + "loss": 0.15787842869758606, + "loss_ce": 0.007915052585303783, + "loss_iou": 0.2490234375, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 519783896, + "step": 5360 + }, + { + "epoch": 0.5241493938208839, + "grad_norm": 9.054958634056094, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 519879924, + "step": 5361 + }, + { + "epoch": 0.5241493938208839, + "loss": 0.04098637402057648, + "loss_ce": 0.0022977162152528763, + "loss_iou": 0.25, + "loss_num": 0.00775146484375, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 519879924, + "step": 5361 + }, + { + "epoch": 0.5242471646460696, + "grad_norm": 5.534800783584782, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 519976724, + "step": 5362 + }, + { + "epoch": 0.5242471646460696, + "loss": 0.060949333012104034, + "loss_ce": 0.005270009860396385, + "loss_iou": 0.267578125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 519976724, + "step": 5362 + }, + { + "epoch": 0.5243449354712554, + "grad_norm": 31.81028349085567, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 520072800, + "step": 5363 + }, + { + "epoch": 0.5243449354712554, + "loss": 0.09177561104297638, + "loss_ce": 0.007913309149444103, + "loss_iou": 0.2060546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 520072800, + "step": 5363 + }, + { + "epoch": 0.5244427062964412, + "grad_norm": 17.221020626037394, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 520169520, + "step": 5364 + }, + { + "epoch": 0.5244427062964412, + "loss": 0.055305205285549164, + "loss_ce": 0.004096708260476589, + "loss_iou": 0.28515625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 520169520, + "step": 5364 + }, + { + "epoch": 0.5245404771216269, + "grad_norm": 19.527488230837307, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 520266140, + "step": 5365 + }, + { + "epoch": 0.5245404771216269, + "loss": 0.1473350077867508, + "loss_ce": 0.004360150080174208, + "loss_iou": 0.271484375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 520266140, + "step": 5365 + }, + { + "epoch": 0.5246382479468127, + "grad_norm": 11.413439824755555, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 520363676, + "step": 5366 + }, + { + "epoch": 0.5246382479468127, + "loss": 0.07831250131130219, + "loss_ce": 0.005100829526782036, + "loss_iou": 0.326171875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 520363676, + "step": 5366 + }, + { + "epoch": 0.5247360187719984, + "grad_norm": 5.673818750334565, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 520460100, + "step": 5367 + }, + { + "epoch": 0.5247360187719984, + "loss": 0.07943255454301834, + "loss_ce": 0.008982726372778416, + "loss_iou": 0.291015625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 520460100, + "step": 5367 + }, + { + "epoch": 0.5248337895971842, + "grad_norm": 3.792015539988509, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 520558168, + "step": 5368 + }, + { + "epoch": 0.5248337895971842, + "loss": 0.06719937920570374, + "loss_ce": 0.002166423015296459, + "loss_iou": 0.3359375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 520558168, + "step": 5368 + }, + { + "epoch": 0.52493156042237, + "grad_norm": 8.462249213686514, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 520655632, + "step": 5369 + }, + { + "epoch": 0.52493156042237, + "loss": 0.10692168027162552, + "loss_ce": 0.004687793552875519, + "loss_iou": 0.263671875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 520655632, + "step": 5369 + }, + { + "epoch": 0.5250293312475557, + "grad_norm": 6.100392199334245, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 520753060, + "step": 5370 + }, + { + "epoch": 0.5250293312475557, + "loss": 0.11860498785972595, + "loss_ce": 0.006788583472371101, + "loss_iou": 0.314453125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 520753060, + "step": 5370 + }, + { + "epoch": 0.5251271020727415, + "grad_norm": 9.572821730391794, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 520850016, + "step": 5371 + }, + { + "epoch": 0.5251271020727415, + "loss": 0.1422823965549469, + "loss_ce": 0.007219224702566862, + "loss_iou": 0.265625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 520850016, + "step": 5371 + }, + { + "epoch": 0.5252248728979273, + "grad_norm": 7.7714163885626935, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 520947136, + "step": 5372 + }, + { + "epoch": 0.5252248728979273, + "loss": 0.07920847833156586, + "loss_ce": 0.006965741980820894, + "loss_iou": 0.2734375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 520947136, + "step": 5372 + }, + { + "epoch": 0.525322643723113, + "grad_norm": 12.664957622039347, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 521044604, + "step": 5373 + }, + { + "epoch": 0.525322643723113, + "loss": 0.07261675596237183, + "loss_ce": 0.0079042362049222, + "loss_iou": 0.236328125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 521044604, + "step": 5373 + }, + { + "epoch": 0.5254204145482988, + "grad_norm": 7.674499909810266, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 521141724, + "step": 5374 + }, + { + "epoch": 0.5254204145482988, + "loss": 0.09800337255001068, + "loss_ce": 0.002758012618869543, + "loss_iou": 0.2412109375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 521141724, + "step": 5374 + }, + { + "epoch": 0.5255181853734846, + "grad_norm": 3.2591898159874426, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 521238784, + "step": 5375 + }, + { + "epoch": 0.5255181853734846, + "loss": 0.09165433049201965, + "loss_ce": 0.008257420733571053, + "loss_iou": 0.29296875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 521238784, + "step": 5375 + }, + { + "epoch": 0.5256159561986703, + "grad_norm": 6.135770320998066, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 521336644, + "step": 5376 + }, + { + "epoch": 0.5256159561986703, + "loss": 0.06922909617424011, + "loss_ce": 0.00514218071475625, + "loss_iou": 0.283203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 521336644, + "step": 5376 + }, + { + "epoch": 0.5257137270238561, + "grad_norm": 12.923004218567838, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 521433872, + "step": 5377 + }, + { + "epoch": 0.5257137270238561, + "loss": 0.047770898789167404, + "loss_ce": 0.0034441170282661915, + "loss_iou": 0.16796875, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 521433872, + "step": 5377 + }, + { + "epoch": 0.5258114978490418, + "grad_norm": 6.301717969822925, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 521530840, + "step": 5378 + }, + { + "epoch": 0.5258114978490418, + "loss": 0.057645924389362335, + "loss_ce": 0.0037747668102383614, + "loss_iou": 0.345703125, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 521530840, + "step": 5378 + }, + { + "epoch": 0.5259092686742276, + "grad_norm": 4.968263881152918, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 521627824, + "step": 5379 + }, + { + "epoch": 0.5259092686742276, + "loss": 0.053591325879096985, + "loss_ce": 0.004732683300971985, + "loss_iou": 0.416015625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 521627824, + "step": 5379 + }, + { + "epoch": 0.5260070394994134, + "grad_norm": 8.620848998749572, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 521724944, + "step": 5380 + }, + { + "epoch": 0.5260070394994134, + "loss": 0.08696691691875458, + "loss_ce": 0.001613064669072628, + "loss_iou": 0.357421875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 521724944, + "step": 5380 + }, + { + "epoch": 0.5261048103245991, + "grad_norm": 8.295987129065557, + "learning_rate": 5e-05, + "loss": 0.1082, + "num_input_tokens_seen": 521821532, + "step": 5381 + }, + { + "epoch": 0.5261048103245991, + "loss": 0.1410120129585266, + "loss_ce": 0.005880163982510567, + "loss_iou": 0.380859375, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 521821532, + "step": 5381 + }, + { + "epoch": 0.5262025811497849, + "grad_norm": 19.0799504775513, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 521918480, + "step": 5382 + }, + { + "epoch": 0.5262025811497849, + "loss": 0.08876897394657135, + "loss_ce": 0.008515369147062302, + "loss_iou": 0.326171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 521918480, + "step": 5382 + }, + { + "epoch": 0.5263003519749707, + "grad_norm": 4.458884076659612, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 522015472, + "step": 5383 + }, + { + "epoch": 0.5263003519749707, + "loss": 0.04712580889463425, + "loss_ce": 0.0056295329704880714, + "loss_iou": 0.28515625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 522015472, + "step": 5383 + }, + { + "epoch": 0.5263981228001564, + "grad_norm": 8.471820952100837, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 522111752, + "step": 5384 + }, + { + "epoch": 0.5263981228001564, + "loss": 0.1030314713716507, + "loss_ce": 0.004993753042072058, + "loss_iou": 0.37109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 522111752, + "step": 5384 + }, + { + "epoch": 0.5264958936253422, + "grad_norm": 15.268940231436153, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 522208168, + "step": 5385 + }, + { + "epoch": 0.5264958936253422, + "loss": 0.07598131895065308, + "loss_ce": 0.0035993396304547787, + "loss_iou": 0.28515625, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 522208168, + "step": 5385 + }, + { + "epoch": 0.5265936644505279, + "grad_norm": 6.355475601299585, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 522305396, + "step": 5386 + }, + { + "epoch": 0.5265936644505279, + "loss": 0.09391431510448456, + "loss_ce": 0.007770814001560211, + "loss_iou": 0.38671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 522305396, + "step": 5386 + }, + { + "epoch": 0.5266914352757137, + "grad_norm": 9.33735890965403, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 522402784, + "step": 5387 + }, + { + "epoch": 0.5266914352757137, + "loss": 0.09117425233125687, + "loss_ce": 0.008349547162652016, + "loss_iou": 0.2734375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 522402784, + "step": 5387 + }, + { + "epoch": 0.5267892061008995, + "grad_norm": 5.295280163106288, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 522499472, + "step": 5388 + }, + { + "epoch": 0.5267892061008995, + "loss": 0.10781191289424896, + "loss_ce": 0.00411318801343441, + "loss_iou": 0.25, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 522499472, + "step": 5388 + }, + { + "epoch": 0.5268869769260852, + "grad_norm": 5.381119840222038, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 522595552, + "step": 5389 + }, + { + "epoch": 0.5268869769260852, + "loss": 0.0754023864865303, + "loss_ce": 0.00978577509522438, + "loss_iou": 0.2158203125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 522595552, + "step": 5389 + }, + { + "epoch": 0.526984747751271, + "grad_norm": 14.782980428028171, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 522693284, + "step": 5390 + }, + { + "epoch": 0.526984747751271, + "loss": 0.08867158740758896, + "loss_ce": 0.008120440877974033, + "loss_iou": 0.25390625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 522693284, + "step": 5390 + }, + { + "epoch": 0.5270825185764568, + "grad_norm": 3.722400377518472, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 522790188, + "step": 5391 + }, + { + "epoch": 0.5270825185764568, + "loss": 0.1022036075592041, + "loss_ce": 0.009094479493796825, + "loss_iou": 0.29296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 522790188, + "step": 5391 + }, + { + "epoch": 0.5271802894016425, + "grad_norm": 9.990942157970958, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 522888016, + "step": 5392 + }, + { + "epoch": 0.5271802894016425, + "loss": 0.06280651688575745, + "loss_ce": 0.007813839241862297, + "loss_iou": 0.328125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 522888016, + "step": 5392 + }, + { + "epoch": 0.5272780602268283, + "grad_norm": 5.285027752014769, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 522984040, + "step": 5393 + }, + { + "epoch": 0.5272780602268283, + "loss": 0.0693739578127861, + "loss_ce": 0.002700679935514927, + "loss_iou": 0.267578125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 522984040, + "step": 5393 + }, + { + "epoch": 0.5273758310520141, + "grad_norm": 6.620034153363883, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 523079988, + "step": 5394 + }, + { + "epoch": 0.5273758310520141, + "loss": 0.12370702624320984, + "loss_ce": 0.009021962061524391, + "loss_iou": 0.2490234375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 523079988, + "step": 5394 + }, + { + "epoch": 0.5274736018771998, + "grad_norm": 1.07429854142882, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 523176516, + "step": 5395 + }, + { + "epoch": 0.5274736018771998, + "loss": 0.04313573241233826, + "loss_ce": 0.008864492177963257, + "loss_iou": 0.216796875, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 523176516, + "step": 5395 + }, + { + "epoch": 0.5275713727023856, + "grad_norm": 6.092184667024538, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 523273104, + "step": 5396 + }, + { + "epoch": 0.5275713727023856, + "loss": 0.06056653708219528, + "loss_ce": 0.0038801366463303566, + "loss_iou": 0.25390625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 523273104, + "step": 5396 + }, + { + "epoch": 0.5276691435275713, + "grad_norm": 7.578776957551785, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 523369188, + "step": 5397 + }, + { + "epoch": 0.5276691435275713, + "loss": 0.06095464900135994, + "loss_ce": 0.005446987226605415, + "loss_iou": 0.19921875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 523369188, + "step": 5397 + }, + { + "epoch": 0.5277669143527571, + "grad_norm": 8.60703578423613, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 523466512, + "step": 5398 + }, + { + "epoch": 0.5277669143527571, + "loss": 0.032809846103191376, + "loss_ce": 0.003154389327391982, + "loss_iou": 0.2412109375, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 523466512, + "step": 5398 + }, + { + "epoch": 0.5278646851779429, + "grad_norm": 5.230367134889981, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 523563056, + "step": 5399 + }, + { + "epoch": 0.5278646851779429, + "loss": 0.11665427684783936, + "loss_ce": 0.00624168012291193, + "loss_iou": 0.2197265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 523563056, + "step": 5399 + }, + { + "epoch": 0.5279624560031286, + "grad_norm": 7.205566006521984, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 523659892, + "step": 5400 + }, + { + "epoch": 0.5279624560031286, + "loss": 0.09003031998872757, + "loss_ce": 0.0030933700036257505, + "loss_iou": 0.34765625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 523659892, + "step": 5400 + }, + { + "epoch": 0.5280602268283144, + "grad_norm": 17.696985561484368, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 523757340, + "step": 5401 + }, + { + "epoch": 0.5280602268283144, + "loss": 0.08573286235332489, + "loss_ce": 0.010552816092967987, + "loss_iou": 0.376953125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 523757340, + "step": 5401 + }, + { + "epoch": 0.5281579976535002, + "grad_norm": 8.356325387798796, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 523854232, + "step": 5402 + }, + { + "epoch": 0.5281579976535002, + "loss": 0.0731947273015976, + "loss_ce": 0.0035307237412780523, + "loss_iou": 0.314453125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 523854232, + "step": 5402 + }, + { + "epoch": 0.5282557684786859, + "grad_norm": 11.338063132358366, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 523951644, + "step": 5403 + }, + { + "epoch": 0.5282557684786859, + "loss": 0.06639629602432251, + "loss_ce": 0.007878842763602734, + "loss_iou": 0.255859375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 523951644, + "step": 5403 + }, + { + "epoch": 0.5283535393038717, + "grad_norm": 11.986600323141822, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 524048152, + "step": 5404 + }, + { + "epoch": 0.5283535393038717, + "loss": 0.08950238674879074, + "loss_ce": 0.005048807244747877, + "loss_iou": 0.314453125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 524048152, + "step": 5404 + }, + { + "epoch": 0.5284513101290574, + "grad_norm": 23.049015484329097, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 524144140, + "step": 5405 + }, + { + "epoch": 0.5284513101290574, + "loss": 0.0717330351471901, + "loss_ce": 0.003091369988396764, + "loss_iou": 0.171875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 524144140, + "step": 5405 + }, + { + "epoch": 0.5285490809542432, + "grad_norm": 9.583740959606766, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 524241492, + "step": 5406 + }, + { + "epoch": 0.5285490809542432, + "loss": 0.07539738714694977, + "loss_ce": 0.0049628219567239285, + "loss_iou": 0.353515625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 524241492, + "step": 5406 + }, + { + "epoch": 0.528646851779429, + "grad_norm": 7.368956717403124, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 524338696, + "step": 5407 + }, + { + "epoch": 0.528646851779429, + "loss": 0.06726396083831787, + "loss_ce": 0.0019105717074126005, + "loss_iou": 0.349609375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 524338696, + "step": 5407 + }, + { + "epoch": 0.5287446226046147, + "grad_norm": 32.48948512208994, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 524435900, + "step": 5408 + }, + { + "epoch": 0.5287446226046147, + "loss": 0.0979757308959961, + "loss_ce": 0.007643701508641243, + "loss_iou": 0.3984375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 524435900, + "step": 5408 + }, + { + "epoch": 0.5288423934298006, + "grad_norm": 26.894186392859456, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 524532292, + "step": 5409 + }, + { + "epoch": 0.5288423934298006, + "loss": 0.06275028735399246, + "loss_ce": 0.0051178401336073875, + "loss_iou": 0.30859375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 524532292, + "step": 5409 + }, + { + "epoch": 0.5289401642549864, + "grad_norm": 11.368430130325429, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 524627840, + "step": 5410 + }, + { + "epoch": 0.5289401642549864, + "loss": 0.08909964561462402, + "loss_ce": 0.007972474209964275, + "loss_iou": 0.2314453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 524627840, + "step": 5410 + }, + { + "epoch": 0.529037935080172, + "grad_norm": 4.8231637708780495, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 524723596, + "step": 5411 + }, + { + "epoch": 0.529037935080172, + "loss": 0.07963196933269501, + "loss_ce": 0.0064355554059147835, + "loss_iou": 0.1513671875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 524723596, + "step": 5411 + }, + { + "epoch": 0.5291357059053579, + "grad_norm": 5.172457856994106, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 524820360, + "step": 5412 + }, + { + "epoch": 0.5291357059053579, + "loss": 0.10685421526432037, + "loss_ce": 0.007397432811558247, + "loss_iou": 0.2373046875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 524820360, + "step": 5412 + }, + { + "epoch": 0.5292334767305436, + "grad_norm": 6.126219368189578, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 524916620, + "step": 5413 + }, + { + "epoch": 0.5292334767305436, + "loss": 0.07706576585769653, + "loss_ce": 0.004685699474066496, + "loss_iou": 0.2451171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 524916620, + "step": 5413 + }, + { + "epoch": 0.5293312475557294, + "grad_norm": 16.799701910718387, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 525014052, + "step": 5414 + }, + { + "epoch": 0.5293312475557294, + "loss": 0.060811225324869156, + "loss_ce": 0.003918833564966917, + "loss_iou": 0.26171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 525014052, + "step": 5414 + }, + { + "epoch": 0.5294290183809152, + "grad_norm": 19.51859008676897, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 525111224, + "step": 5415 + }, + { + "epoch": 0.5294290183809152, + "loss": 0.10463665425777435, + "loss_ce": 0.008254511281847954, + "loss_iou": 0.3203125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 525111224, + "step": 5415 + }, + { + "epoch": 0.5295267892061009, + "grad_norm": 7.3538823143767935, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 525208604, + "step": 5416 + }, + { + "epoch": 0.5295267892061009, + "loss": 0.06593289971351624, + "loss_ce": 0.007529891096055508, + "loss_iou": 0.310546875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 525208604, + "step": 5416 + }, + { + "epoch": 0.5296245600312867, + "grad_norm": 5.299553196304783, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 525306260, + "step": 5417 + }, + { + "epoch": 0.5296245600312867, + "loss": 0.09024642407894135, + "loss_ce": 0.006765588186681271, + "loss_iou": 0.361328125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 525306260, + "step": 5417 + }, + { + "epoch": 0.5297223308564725, + "grad_norm": 11.563187425509133, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 525403908, + "step": 5418 + }, + { + "epoch": 0.5297223308564725, + "loss": 0.10014107078313828, + "loss_ce": 0.007108233869075775, + "loss_iou": 0.30078125, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 525403908, + "step": 5418 + }, + { + "epoch": 0.5298201016816582, + "grad_norm": 7.448238710322672, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 525500300, + "step": 5419 + }, + { + "epoch": 0.5298201016816582, + "loss": 0.05868389457464218, + "loss_ce": 0.0035538896918296814, + "loss_iou": 0.2734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 525500300, + "step": 5419 + }, + { + "epoch": 0.529917872506844, + "grad_norm": 2.3087521577069174, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 525596652, + "step": 5420 + }, + { + "epoch": 0.529917872506844, + "loss": 0.10644067823886871, + "loss_ce": 0.016352791339159012, + "loss_iou": 0.2021484375, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 525596652, + "step": 5420 + }, + { + "epoch": 0.5300156433320298, + "grad_norm": 5.789192320056562, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 525694244, + "step": 5421 + }, + { + "epoch": 0.5300156433320298, + "loss": 0.11346842348575592, + "loss_ce": 0.007908121682703495, + "loss_iou": 0.3671875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 525694244, + "step": 5421 + }, + { + "epoch": 0.5301134141572155, + "grad_norm": 6.397241009017076, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 525791040, + "step": 5422 + }, + { + "epoch": 0.5301134141572155, + "loss": 0.09955453127622604, + "loss_ce": 0.004438868723809719, + "loss_iou": 0.296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 525791040, + "step": 5422 + }, + { + "epoch": 0.5302111849824013, + "grad_norm": 7.3788486242129645, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 525889244, + "step": 5423 + }, + { + "epoch": 0.5302111849824013, + "loss": 0.06193208694458008, + "loss_ce": 0.004498002585023642, + "loss_iou": 0.310546875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 525889244, + "step": 5423 + }, + { + "epoch": 0.530308955807587, + "grad_norm": 8.885996830511145, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 525986920, + "step": 5424 + }, + { + "epoch": 0.530308955807587, + "loss": 0.057438164949417114, + "loss_ce": 0.007343560457229614, + "loss_iou": 0.40234375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 525986920, + "step": 5424 + }, + { + "epoch": 0.5304067266327728, + "grad_norm": 12.06699644681217, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 526084112, + "step": 5425 + }, + { + "epoch": 0.5304067266327728, + "loss": 0.12488628178834915, + "loss_ce": 0.013039365410804749, + "loss_iou": 0.318359375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 526084112, + "step": 5425 + }, + { + "epoch": 0.5305044974579586, + "grad_norm": 13.320661408830263, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 526180848, + "step": 5426 + }, + { + "epoch": 0.5305044974579586, + "loss": 0.06730900704860687, + "loss_ce": 0.005831348709762096, + "loss_iou": 0.361328125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 526180848, + "step": 5426 + }, + { + "epoch": 0.5306022682831443, + "grad_norm": 9.30065538426501, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 526278804, + "step": 5427 + }, + { + "epoch": 0.5306022682831443, + "loss": 0.08088679611682892, + "loss_ce": 0.007858235388994217, + "loss_iou": 0.349609375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 526278804, + "step": 5427 + }, + { + "epoch": 0.5307000391083301, + "grad_norm": 13.186865794409904, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 526376048, + "step": 5428 + }, + { + "epoch": 0.5307000391083301, + "loss": 0.10992884635925293, + "loss_ce": 0.005955458618700504, + "loss_iou": 0.31640625, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 526376048, + "step": 5428 + }, + { + "epoch": 0.5307978099335159, + "grad_norm": 8.93897482756953, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 526473748, + "step": 5429 + }, + { + "epoch": 0.5307978099335159, + "loss": 0.10340765118598938, + "loss_ce": 0.005080021917819977, + "loss_iou": 0.33984375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 526473748, + "step": 5429 + }, + { + "epoch": 0.5308955807587016, + "grad_norm": 12.552589211556565, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 526571200, + "step": 5430 + }, + { + "epoch": 0.5308955807587016, + "loss": 0.08602246642112732, + "loss_ce": 0.009148690849542618, + "loss_iou": 0.408203125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 526571200, + "step": 5430 + }, + { + "epoch": 0.5309933515838874, + "grad_norm": 23.518636392977363, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 526668332, + "step": 5431 + }, + { + "epoch": 0.5309933515838874, + "loss": 0.08677216619253159, + "loss_ce": 0.006236276589334011, + "loss_iou": 0.375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 526668332, + "step": 5431 + }, + { + "epoch": 0.5310911224090731, + "grad_norm": 18.597284052252363, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 526765156, + "step": 5432 + }, + { + "epoch": 0.5310911224090731, + "loss": 0.08294031023979187, + "loss_ce": 0.008416379801928997, + "loss_iou": 0.361328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 526765156, + "step": 5432 + }, + { + "epoch": 0.5311888932342589, + "grad_norm": 6.034969736185151, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 526861096, + "step": 5433 + }, + { + "epoch": 0.5311888932342589, + "loss": 0.1187678873538971, + "loss_ce": 0.009900243952870369, + "loss_iou": 0.212890625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 526861096, + "step": 5433 + }, + { + "epoch": 0.5312866640594447, + "grad_norm": 9.489548570479105, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 526957916, + "step": 5434 + }, + { + "epoch": 0.5312866640594447, + "loss": 0.06006408482789993, + "loss_ce": 0.007360225543379784, + "loss_iou": 0.27734375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 526957916, + "step": 5434 + }, + { + "epoch": 0.5313844348846304, + "grad_norm": 5.185112407702597, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 527054556, + "step": 5435 + }, + { + "epoch": 0.5313844348846304, + "loss": 0.05332595854997635, + "loss_ce": 0.004757235758006573, + "loss_iou": 0.2578125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 527054556, + "step": 5435 + }, + { + "epoch": 0.5314822057098162, + "grad_norm": 8.578296329946271, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 527151024, + "step": 5436 + }, + { + "epoch": 0.5314822057098162, + "loss": 0.05645758658647537, + "loss_ce": 0.005722112022340298, + "loss_iou": 0.2021484375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 527151024, + "step": 5436 + }, + { + "epoch": 0.531579976535002, + "grad_norm": 20.186773690290323, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 527247572, + "step": 5437 + }, + { + "epoch": 0.531579976535002, + "loss": 0.14044910669326782, + "loss_ce": 0.0037303557619452477, + "loss_iou": 0.255859375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 527247572, + "step": 5437 + }, + { + "epoch": 0.5316777473601877, + "grad_norm": 11.211748667311126, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 527344036, + "step": 5438 + }, + { + "epoch": 0.5316777473601877, + "loss": 0.08524811267852783, + "loss_ce": 0.008572695776820183, + "loss_iou": 0.1591796875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 527344036, + "step": 5438 + }, + { + "epoch": 0.5317755181853735, + "grad_norm": 6.482449115315265, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 527440696, + "step": 5439 + }, + { + "epoch": 0.5317755181853735, + "loss": 0.09561134874820709, + "loss_ce": 0.0034711502958089113, + "loss_iou": 0.27734375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 527440696, + "step": 5439 + }, + { + "epoch": 0.5318732890105593, + "grad_norm": 8.826425678120115, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 527537984, + "step": 5440 + }, + { + "epoch": 0.5318732890105593, + "loss": 0.07641144096851349, + "loss_ce": 0.0042068567126989365, + "loss_iou": 0.34765625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 527537984, + "step": 5440 + }, + { + "epoch": 0.531971059835745, + "grad_norm": 2.189670089453956, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 527635604, + "step": 5441 + }, + { + "epoch": 0.531971059835745, + "loss": 0.1108681857585907, + "loss_ce": 0.01040431298315525, + "loss_iou": 0.357421875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 527635604, + "step": 5441 + }, + { + "epoch": 0.5320688306609308, + "grad_norm": 5.711505524175246, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 527732324, + "step": 5442 + }, + { + "epoch": 0.5320688306609308, + "loss": 0.07608262449502945, + "loss_ce": 0.002863321453332901, + "loss_iou": 0.26171875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 527732324, + "step": 5442 + }, + { + "epoch": 0.5321666014861165, + "grad_norm": 3.688078522297132, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 527829380, + "step": 5443 + }, + { + "epoch": 0.5321666014861165, + "loss": 0.054687537252902985, + "loss_ce": 0.004879035521298647, + "loss_iou": 0.17578125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 527829380, + "step": 5443 + }, + { + "epoch": 0.5322643723113023, + "grad_norm": 5.114887102866254, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 527925996, + "step": 5444 + }, + { + "epoch": 0.5322643723113023, + "loss": 0.11418075114488602, + "loss_ce": 0.008895107544958591, + "loss_iou": 0.38671875, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 527925996, + "step": 5444 + }, + { + "epoch": 0.5323621431364881, + "grad_norm": 8.929002464899003, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 528023168, + "step": 5445 + }, + { + "epoch": 0.5323621431364881, + "loss": 0.10751878470182419, + "loss_ce": 0.005353566259145737, + "loss_iou": 0.298828125, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 528023168, + "step": 5445 + }, + { + "epoch": 0.5324599139616738, + "grad_norm": 21.740575735377817, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 528119812, + "step": 5446 + }, + { + "epoch": 0.5324599139616738, + "loss": 0.10025462508201599, + "loss_ce": 0.00938853994011879, + "loss_iou": 0.1943359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 528119812, + "step": 5446 + }, + { + "epoch": 0.5325576847868596, + "grad_norm": 29.07715945726122, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 528217488, + "step": 5447 + }, + { + "epoch": 0.5325576847868596, + "loss": 0.09247130155563354, + "loss_ce": 0.003421010449528694, + "loss_iou": 0.388671875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 528217488, + "step": 5447 + }, + { + "epoch": 0.5326554556120454, + "grad_norm": 9.675948031155892, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 528314632, + "step": 5448 + }, + { + "epoch": 0.5326554556120454, + "loss": 0.1116148978471756, + "loss_ce": 0.006741241551935673, + "loss_iou": 0.376953125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 528314632, + "step": 5448 + }, + { + "epoch": 0.5327532264372311, + "grad_norm": 3.79974645808732, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 528411392, + "step": 5449 + }, + { + "epoch": 0.5327532264372311, + "loss": 0.07207667082548141, + "loss_ce": 0.008638259023427963, + "loss_iou": 0.263671875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 528411392, + "step": 5449 + }, + { + "epoch": 0.5328509972624169, + "grad_norm": 7.296338131166009, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 528508532, + "step": 5450 + }, + { + "epoch": 0.5328509972624169, + "loss": 0.1106114536523819, + "loss_ce": 0.0047841244377195835, + "loss_iou": 0.328125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 528508532, + "step": 5450 + }, + { + "epoch": 0.5329487680876026, + "grad_norm": 2.8865369940689756, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 528606292, + "step": 5451 + }, + { + "epoch": 0.5329487680876026, + "loss": 0.09358645975589752, + "loss_ce": 0.00459719356149435, + "loss_iou": 0.3046875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 528606292, + "step": 5451 + }, + { + "epoch": 0.5330465389127884, + "grad_norm": 4.677050621582458, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 528702644, + "step": 5452 + }, + { + "epoch": 0.5330465389127884, + "loss": 0.042651593685150146, + "loss_ce": 0.0023683884646743536, + "loss_iou": 0.24609375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 528702644, + "step": 5452 + }, + { + "epoch": 0.5331443097379742, + "grad_norm": 12.680392889067011, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 528798916, + "step": 5453 + }, + { + "epoch": 0.5331443097379742, + "loss": 0.07590173929929733, + "loss_ce": 0.0018813539063557982, + "loss_iou": 0.318359375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 528798916, + "step": 5453 + }, + { + "epoch": 0.5332420805631599, + "grad_norm": 7.528123538277337, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 528896036, + "step": 5454 + }, + { + "epoch": 0.5332420805631599, + "loss": 0.0889691561460495, + "loss_ce": 0.005244177766144276, + "loss_iou": 0.294921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 528896036, + "step": 5454 + }, + { + "epoch": 0.5333398513883457, + "grad_norm": 4.592009264377709, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 528992644, + "step": 5455 + }, + { + "epoch": 0.5333398513883457, + "loss": 0.06266535818576813, + "loss_ce": 0.006619825027883053, + "loss_iou": 0.248046875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 528992644, + "step": 5455 + }, + { + "epoch": 0.5334376222135315, + "grad_norm": 7.231837780928267, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 529089156, + "step": 5456 + }, + { + "epoch": 0.5334376222135315, + "loss": 0.13329559564590454, + "loss_ce": 0.004679256584495306, + "loss_iou": 0.43359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 529089156, + "step": 5456 + }, + { + "epoch": 0.5335353930387172, + "grad_norm": 6.522992502308903, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 529187028, + "step": 5457 + }, + { + "epoch": 0.5335353930387172, + "loss": 0.107246533036232, + "loss_ce": 0.004615917801856995, + "loss_iou": 0.28515625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 529187028, + "step": 5457 + }, + { + "epoch": 0.533633163863903, + "grad_norm": 3.5611459770848466, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 529283744, + "step": 5458 + }, + { + "epoch": 0.533633163863903, + "loss": 0.07794958353042603, + "loss_ce": 0.009132438339293003, + "loss_iou": 0.28125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 529283744, + "step": 5458 + }, + { + "epoch": 0.5337309346890887, + "grad_norm": 3.9069478035957395, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 529379984, + "step": 5459 + }, + { + "epoch": 0.5337309346890887, + "loss": 0.12250380218029022, + "loss_ce": 0.003912493120878935, + "loss_iou": 0.37890625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 529379984, + "step": 5459 + }, + { + "epoch": 0.5338287055142745, + "grad_norm": 3.224653787049269, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 529477072, + "step": 5460 + }, + { + "epoch": 0.5338287055142745, + "loss": 0.07651729881763458, + "loss_ce": 0.005426602903753519, + "loss_iou": 0.33203125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 529477072, + "step": 5460 + }, + { + "epoch": 0.5339264763394603, + "grad_norm": 10.07845652614159, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 529574064, + "step": 5461 + }, + { + "epoch": 0.5339264763394603, + "loss": 0.09070535004138947, + "loss_ce": 0.009131868369877338, + "loss_iou": 0.345703125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 529574064, + "step": 5461 + }, + { + "epoch": 0.534024247164646, + "grad_norm": 8.628152296660348, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 529671564, + "step": 5462 + }, + { + "epoch": 0.534024247164646, + "loss": 0.07437963783740997, + "loss_ce": 0.005760864354670048, + "loss_iou": 0.37109375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 529671564, + "step": 5462 + }, + { + "epoch": 0.5341220179898318, + "grad_norm": 8.561531526423446, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 529768712, + "step": 5463 + }, + { + "epoch": 0.5341220179898318, + "loss": 0.06449538469314575, + "loss_ce": 0.002643885090947151, + "loss_iou": 0.294921875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 529768712, + "step": 5463 + }, + { + "epoch": 0.5342197888150176, + "grad_norm": 9.921126200342151, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 529865928, + "step": 5464 + }, + { + "epoch": 0.5342197888150176, + "loss": 0.07915845513343811, + "loss_ce": 0.005565315019339323, + "loss_iou": 0.287109375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 529865928, + "step": 5464 + }, + { + "epoch": 0.5343175596402033, + "grad_norm": 8.025143199565923, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 529962064, + "step": 5465 + }, + { + "epoch": 0.5343175596402033, + "loss": 0.055682115256786346, + "loss_ce": 0.004565175622701645, + "loss_iou": 0.37109375, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 529962064, + "step": 5465 + }, + { + "epoch": 0.5344153304653891, + "grad_norm": 9.598150179200006, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 530058708, + "step": 5466 + }, + { + "epoch": 0.5344153304653891, + "loss": 0.12106262892484665, + "loss_ce": 0.008910526521503925, + "loss_iou": 0.322265625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 530058708, + "step": 5466 + }, + { + "epoch": 0.5345131012905749, + "grad_norm": 7.673480194378592, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 530156172, + "step": 5467 + }, + { + "epoch": 0.5345131012905749, + "loss": 0.08697905391454697, + "loss_ce": 0.006275320425629616, + "loss_iou": 0.31640625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 530156172, + "step": 5467 + }, + { + "epoch": 0.5346108721157606, + "grad_norm": 6.042168751819153, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 530253184, + "step": 5468 + }, + { + "epoch": 0.5346108721157606, + "loss": 0.060046687722206116, + "loss_ce": 0.005023493431508541, + "loss_iou": 0.40234375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 530253184, + "step": 5468 + }, + { + "epoch": 0.5347086429409464, + "grad_norm": 18.542638799363733, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 530349720, + "step": 5469 + }, + { + "epoch": 0.5347086429409464, + "loss": 0.040665820240974426, + "loss_ce": 0.005601122509688139, + "loss_iou": 0.1904296875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 530349720, + "step": 5469 + }, + { + "epoch": 0.5348064137661321, + "grad_norm": 8.173867258291553, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 530447308, + "step": 5470 + }, + { + "epoch": 0.5348064137661321, + "loss": 0.06679360568523407, + "loss_ce": 0.008810209110379219, + "loss_iou": 0.392578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 530447308, + "step": 5470 + }, + { + "epoch": 0.5349041845913179, + "grad_norm": 12.07809612728015, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 530544860, + "step": 5471 + }, + { + "epoch": 0.5349041845913179, + "loss": 0.09496717154979706, + "loss_ce": 0.008083629421889782, + "loss_iou": 0.322265625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 530544860, + "step": 5471 + }, + { + "epoch": 0.5350019554165037, + "grad_norm": 7.4012364572388325, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 530640900, + "step": 5472 + }, + { + "epoch": 0.5350019554165037, + "loss": 0.11883523315191269, + "loss_ce": 0.007629175670444965, + "loss_iou": 0.27734375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 530640900, + "step": 5472 + }, + { + "epoch": 0.5350997262416894, + "grad_norm": 8.924824230355059, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 530737752, + "step": 5473 + }, + { + "epoch": 0.5350997262416894, + "loss": 0.043047573417425156, + "loss_ce": 0.002550747012719512, + "loss_iou": 0.353515625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 530737752, + "step": 5473 + }, + { + "epoch": 0.5351974970668752, + "grad_norm": 6.887763564741437, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 530834724, + "step": 5474 + }, + { + "epoch": 0.5351974970668752, + "loss": 0.05881994217634201, + "loss_ce": 0.004223993513733149, + "loss_iou": 0.25, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 530834724, + "step": 5474 + }, + { + "epoch": 0.535295267892061, + "grad_norm": 8.031638017677922, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 530932096, + "step": 5475 + }, + { + "epoch": 0.535295267892061, + "loss": 0.04621768370270729, + "loss_ce": 0.0038898028433322906, + "loss_iou": 0.3984375, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 530932096, + "step": 5475 + }, + { + "epoch": 0.5353930387172467, + "grad_norm": 3.4241543522557154, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 531029020, + "step": 5476 + }, + { + "epoch": 0.5353930387172467, + "loss": 0.09581659734249115, + "loss_ce": 0.005850775167346001, + "loss_iou": 0.341796875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 531029020, + "step": 5476 + }, + { + "epoch": 0.5354908095424326, + "grad_norm": 18.590841763306052, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 531126612, + "step": 5477 + }, + { + "epoch": 0.5354908095424326, + "loss": 0.1303917020559311, + "loss_ce": 0.002561194123700261, + "loss_iou": 0.326171875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 531126612, + "step": 5477 + }, + { + "epoch": 0.5355885803676182, + "grad_norm": 8.884304091483795, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 531223440, + "step": 5478 + }, + { + "epoch": 0.5355885803676182, + "loss": 0.09387266635894775, + "loss_ce": 0.0025488114915788174, + "loss_iou": 0.3515625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 531223440, + "step": 5478 + }, + { + "epoch": 0.5356863511928041, + "grad_norm": 2.6536470964970267, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 531319844, + "step": 5479 + }, + { + "epoch": 0.5356863511928041, + "loss": 0.03925676643848419, + "loss_ce": 0.006320666521787643, + "loss_iou": 0.26953125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 531319844, + "step": 5479 + }, + { + "epoch": 0.5357841220179899, + "grad_norm": 3.5175694202834107, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 531416156, + "step": 5480 + }, + { + "epoch": 0.5357841220179899, + "loss": 0.05458417907357216, + "loss_ce": 0.0038372636772692204, + "loss_iou": 0.228515625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 531416156, + "step": 5480 + }, + { + "epoch": 0.5358818928431756, + "grad_norm": 3.096298268481703, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 531512664, + "step": 5481 + }, + { + "epoch": 0.5358818928431756, + "loss": 0.0703592598438263, + "loss_ce": 0.0077066682279109955, + "loss_iou": 0.30859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 531512664, + "step": 5481 + }, + { + "epoch": 0.5359796636683614, + "grad_norm": 3.683903252572516, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 531609892, + "step": 5482 + }, + { + "epoch": 0.5359796636683614, + "loss": 0.08858848363161087, + "loss_ce": 0.01020408608019352, + "loss_iou": 0.34765625, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 531609892, + "step": 5482 + }, + { + "epoch": 0.5360774344935472, + "grad_norm": 3.7309405183079227, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 531707052, + "step": 5483 + }, + { + "epoch": 0.5360774344935472, + "loss": 0.06406158208847046, + "loss_ce": 0.006673277355730534, + "loss_iou": 0.40625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 531707052, + "step": 5483 + }, + { + "epoch": 0.5361752053187329, + "grad_norm": 10.023412767725908, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 531804272, + "step": 5484 + }, + { + "epoch": 0.5361752053187329, + "loss": 0.047261856496334076, + "loss_ce": 0.0009819457773119211, + "loss_iou": 0.306640625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 531804272, + "step": 5484 + }, + { + "epoch": 0.5362729761439187, + "grad_norm": 26.12760934730595, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 531902088, + "step": 5485 + }, + { + "epoch": 0.5362729761439187, + "loss": 0.127323180437088, + "loss_ce": 0.009037057869136333, + "loss_iou": 0.359375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 531902088, + "step": 5485 + }, + { + "epoch": 0.5363707469691045, + "grad_norm": 12.74658523280655, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 531999144, + "step": 5486 + }, + { + "epoch": 0.5363707469691045, + "loss": 0.07946904003620148, + "loss_ce": 0.0021222333889454603, + "loss_iou": 0.40625, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 531999144, + "step": 5486 + }, + { + "epoch": 0.5364685177942902, + "grad_norm": 3.6697550629751703, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 532096280, + "step": 5487 + }, + { + "epoch": 0.5364685177942902, + "loss": 0.04552343487739563, + "loss_ce": 0.003226070199161768, + "loss_iou": 0.296875, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 532096280, + "step": 5487 + }, + { + "epoch": 0.536566288619476, + "grad_norm": 10.441599652863541, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 532192736, + "step": 5488 + }, + { + "epoch": 0.536566288619476, + "loss": 0.0765182375907898, + "loss_ce": 0.00256651290692389, + "loss_iou": 0.2294921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 532192736, + "step": 5488 + }, + { + "epoch": 0.5366640594446617, + "grad_norm": 3.9042378606965573, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 532289144, + "step": 5489 + }, + { + "epoch": 0.5366640594446617, + "loss": 0.07797638326883316, + "loss_ce": 0.0019265800947323442, + "loss_iou": 0.2421875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 532289144, + "step": 5489 + }, + { + "epoch": 0.5367618302698475, + "grad_norm": 9.258146252693424, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 532386144, + "step": 5490 + }, + { + "epoch": 0.5367618302698475, + "loss": 0.04245961830019951, + "loss_ce": 0.001804483705200255, + "loss_iou": 0.345703125, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 532386144, + "step": 5490 + }, + { + "epoch": 0.5368596010950333, + "grad_norm": 32.941354309959024, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 532482860, + "step": 5491 + }, + { + "epoch": 0.5368596010950333, + "loss": 0.08583889901638031, + "loss_ce": 0.0026632361114025116, + "loss_iou": 0.27734375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 532482860, + "step": 5491 + }, + { + "epoch": 0.536957371920219, + "grad_norm": 35.58592290217237, + "learning_rate": 5e-05, + "loss": 0.1193, + "num_input_tokens_seen": 532580888, + "step": 5492 + }, + { + "epoch": 0.536957371920219, + "loss": 0.12037676572799683, + "loss_ce": 0.0015413230285048485, + "loss_iou": 0.38671875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 532580888, + "step": 5492 + }, + { + "epoch": 0.5370551427454048, + "grad_norm": 24.526327325313176, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 532678292, + "step": 5493 + }, + { + "epoch": 0.5370551427454048, + "loss": 0.10349838435649872, + "loss_ce": 0.004392544738948345, + "loss_iou": 0.279296875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 532678292, + "step": 5493 + }, + { + "epoch": 0.5371529135705906, + "grad_norm": 41.20656703670846, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 532774232, + "step": 5494 + }, + { + "epoch": 0.5371529135705906, + "loss": 0.0865321010351181, + "loss_ce": 0.004058346152305603, + "loss_iou": 0.1962890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 532774232, + "step": 5494 + }, + { + "epoch": 0.5372506843957763, + "grad_norm": 23.23179709675659, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 532871348, + "step": 5495 + }, + { + "epoch": 0.5372506843957763, + "loss": 0.05625326931476593, + "loss_ce": 0.0023287110961973667, + "loss_iou": 0.29296875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 532871348, + "step": 5495 + }, + { + "epoch": 0.5373484552209621, + "grad_norm": 13.393921795643246, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 532968432, + "step": 5496 + }, + { + "epoch": 0.5373484552209621, + "loss": 0.0703941285610199, + "loss_ce": 0.0012794409412890673, + "loss_iou": 0.271484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 532968432, + "step": 5496 + }, + { + "epoch": 0.5374462260461478, + "grad_norm": 9.426273326936222, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 533065888, + "step": 5497 + }, + { + "epoch": 0.5374462260461478, + "loss": 0.0550236739218235, + "loss_ce": 0.0072484090924263, + "loss_iou": 0.267578125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 533065888, + "step": 5497 + }, + { + "epoch": 0.5375439968713336, + "grad_norm": 3.0575980593627605, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 533163008, + "step": 5498 + }, + { + "epoch": 0.5375439968713336, + "loss": 0.03862662985920906, + "loss_ce": 0.006079633254557848, + "loss_iou": 0.28515625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 533163008, + "step": 5498 + }, + { + "epoch": 0.5376417676965194, + "grad_norm": 10.800765747449908, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 533260256, + "step": 5499 + }, + { + "epoch": 0.5376417676965194, + "loss": 0.103559210896492, + "loss_ce": 0.00838251318782568, + "loss_iou": 0.251953125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 533260256, + "step": 5499 + }, + { + "epoch": 0.5377395385217051, + "grad_norm": 4.401534918215736, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 533356832, + "step": 5500 + }, + { + "epoch": 0.5377395385217051, + "eval_seeclick_CIoU": 0.521246924996376, + "eval_seeclick_GIoU": 0.5293558835983276, + "eval_seeclick_IoU": 0.56265589594841, + "eval_seeclick_MAE_all": 0.06784800812602043, + "eval_seeclick_MAE_h": 0.04025768116116524, + "eval_seeclick_MAE_w": 0.08551005460321903, + "eval_seeclick_MAE_x": 0.10755724087357521, + "eval_seeclick_MAE_y": 0.03806707076728344, + "eval_seeclick_NUM_probability": 0.9997645616531372, + "eval_seeclick_inside_bbox": 0.8451704680919647, + "eval_seeclick_loss": 0.2710142135620117, + "eval_seeclick_loss_ce": 0.009601390454918146, + "eval_seeclick_loss_iou": 0.38427734375, + "eval_seeclick_loss_num": 0.05500030517578125, + "eval_seeclick_loss_xval": 0.2750244140625, + "eval_seeclick_runtime": 77.7822, + "eval_seeclick_samples_per_second": 0.553, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 533356832, + "step": 5500 + }, + { + "epoch": 0.5377395385217051, + "eval_icons_CIoU": 0.6510928273200989, + "eval_icons_GIoU": 0.6345515251159668, + "eval_icons_IoU": 0.6915538311004639, + "eval_icons_MAE_all": 0.07073802687227726, + "eval_icons_MAE_h": 0.0837131105363369, + "eval_icons_MAE_w": 0.057880932465195656, + "eval_icons_MAE_x": 0.06004432030022144, + "eval_icons_MAE_y": 0.08131375536322594, + "eval_icons_NUM_probability": 0.9999957382678986, + "eval_icons_inside_bbox": 0.7795138955116272, + "eval_icons_loss": 0.1909198760986328, + "eval_icons_loss_ce": 1.6679842929079314e-06, + "eval_icons_loss_iou": 0.33599853515625, + "eval_icons_loss_num": 0.039642333984375, + "eval_icons_loss_xval": 0.198150634765625, + "eval_icons_runtime": 87.0172, + "eval_icons_samples_per_second": 0.575, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 533356832, + "step": 5500 + }, + { + "epoch": 0.5377395385217051, + "eval_screenspot_CIoU": 0.27749159932136536, + "eval_screenspot_GIoU": 0.26528025170167285, + "eval_screenspot_IoU": 0.37780962387720746, + "eval_screenspot_MAE_all": 0.16838539888461432, + "eval_screenspot_MAE_h": 0.10751297076543172, + "eval_screenspot_MAE_w": 0.24282547583182654, + "eval_screenspot_MAE_x": 0.21760036796331406, + "eval_screenspot_MAE_y": 0.10560278842846553, + "eval_screenspot_NUM_probability": 0.9999956687291464, + "eval_screenspot_inside_bbox": 0.6033333341280619, + "eval_screenspot_loss": 0.6005665063858032, + "eval_screenspot_loss_ce": 0.02648935342828433, + "eval_screenspot_loss_iou": 0.3727620442708333, + "eval_screenspot_loss_num": 0.11461385091145833, + "eval_screenspot_loss_xval": 0.5730794270833334, + "eval_screenspot_runtime": 148.4417, + "eval_screenspot_samples_per_second": 0.6, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 533356832, + "step": 5500 + }, + { + "epoch": 0.5377395385217051, + "eval_compot_CIoU": 0.47688959538936615, + "eval_compot_GIoU": 0.46452033519744873, + "eval_compot_IoU": 0.5403349697589874, + "eval_compot_MAE_all": 0.09395622834563255, + "eval_compot_MAE_h": 0.06758393719792366, + "eval_compot_MAE_w": 0.12072406709194183, + "eval_compot_MAE_x": 0.11832063645124435, + "eval_compot_MAE_y": 0.06919628009200096, + "eval_compot_NUM_probability": 0.9999956786632538, + "eval_compot_inside_bbox": 0.7083333432674408, + "eval_compot_loss": 0.31661519408226013, + "eval_compot_loss_ce": 0.03731794282793999, + "eval_compot_loss_iou": 0.4293212890625, + "eval_compot_loss_num": 0.050289154052734375, + "eval_compot_loss_xval": 0.2514801025390625, + "eval_compot_runtime": 87.0655, + "eval_compot_samples_per_second": 0.574, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 533356832, + "step": 5500 + }, + { + "epoch": 0.5377395385217051, + "loss": 0.2654375433921814, + "loss_ce": 0.036189526319503784, + "loss_iou": 0.427734375, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 533356832, + "step": 5500 + }, + { + "epoch": 0.5378373093468909, + "grad_norm": 6.313352495688259, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 533454932, + "step": 5501 + }, + { + "epoch": 0.5378373093468909, + "loss": 0.09408850967884064, + "loss_ce": 0.004565193317830563, + "loss_iou": 0.265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 533454932, + "step": 5501 + }, + { + "epoch": 0.5379350801720767, + "grad_norm": 8.609106902410506, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 533552240, + "step": 5502 + }, + { + "epoch": 0.5379350801720767, + "loss": 0.05785040184855461, + "loss_ce": 0.004475157707929611, + "loss_iou": 0.369140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 533552240, + "step": 5502 + }, + { + "epoch": 0.5380328509972624, + "grad_norm": 45.19121532325894, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 533649236, + "step": 5503 + }, + { + "epoch": 0.5380328509972624, + "loss": 0.1023937538266182, + "loss_ce": 0.013053545728325844, + "loss_iou": 0.275390625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 533649236, + "step": 5503 + }, + { + "epoch": 0.5381306218224482, + "grad_norm": 4.123394869143954, + "learning_rate": 5e-05, + "loss": 0.1205, + "num_input_tokens_seen": 533746784, + "step": 5504 + }, + { + "epoch": 0.5381306218224482, + "loss": 0.14684046804904938, + "loss_ce": 0.006261246278882027, + "loss_iou": 0.46875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 533746784, + "step": 5504 + }, + { + "epoch": 0.5382283926476339, + "grad_norm": 11.797837935912426, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 533842668, + "step": 5505 + }, + { + "epoch": 0.5382283926476339, + "loss": 0.06697642803192139, + "loss_ce": 0.0032099499367177486, + "loss_iou": 0.302734375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 533842668, + "step": 5505 + }, + { + "epoch": 0.5383261634728197, + "grad_norm": 9.391239624821383, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 533939784, + "step": 5506 + }, + { + "epoch": 0.5383261634728197, + "loss": 0.05171982944011688, + "loss_ce": 0.006668252870440483, + "loss_iou": 0.150390625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 533939784, + "step": 5506 + }, + { + "epoch": 0.5384239342980055, + "grad_norm": 2.713455616130646, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 534038088, + "step": 5507 + }, + { + "epoch": 0.5384239342980055, + "loss": 0.13430467247962952, + "loss_ce": 0.010739007964730263, + "loss_iou": 0.302734375, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 534038088, + "step": 5507 + }, + { + "epoch": 0.5385217051231912, + "grad_norm": 3.36889216989904, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 534134740, + "step": 5508 + }, + { + "epoch": 0.5385217051231912, + "loss": 0.07826021313667297, + "loss_ce": 0.010465417057275772, + "loss_iou": 0.2890625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 534134740, + "step": 5508 + }, + { + "epoch": 0.538619475948377, + "grad_norm": 3.269743328228982, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 534232036, + "step": 5509 + }, + { + "epoch": 0.538619475948377, + "loss": 0.06429407000541687, + "loss_ce": 0.0020458423532545567, + "loss_iou": 0.328125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 534232036, + "step": 5509 + }, + { + "epoch": 0.5387172467735628, + "grad_norm": 7.168402679166791, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 534329916, + "step": 5510 + }, + { + "epoch": 0.5387172467735628, + "loss": 0.04361458495259285, + "loss_ce": 0.007535179611295462, + "loss_iou": 0.30859375, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 534329916, + "step": 5510 + }, + { + "epoch": 0.5388150175987485, + "grad_norm": 6.886566154122739, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 534427928, + "step": 5511 + }, + { + "epoch": 0.5388150175987485, + "loss": 0.11341839283704758, + "loss_ce": 0.0034025264903903008, + "loss_iou": 0.375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 534427928, + "step": 5511 + }, + { + "epoch": 0.5389127884239343, + "grad_norm": 2.925145740012152, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 534523636, + "step": 5512 + }, + { + "epoch": 0.5389127884239343, + "loss": 0.1117958053946495, + "loss_ce": 0.005968473386019468, + "loss_iou": 0.25, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 534523636, + "step": 5512 + }, + { + "epoch": 0.5390105592491201, + "grad_norm": 5.452070513438086, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 534620044, + "step": 5513 + }, + { + "epoch": 0.5390105592491201, + "loss": 0.07102695852518082, + "loss_ce": 0.005810895934700966, + "loss_iou": 0.29296875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 534620044, + "step": 5513 + }, + { + "epoch": 0.5391083300743058, + "grad_norm": 4.5584392520566706, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 534717492, + "step": 5514 + }, + { + "epoch": 0.5391083300743058, + "loss": 0.07656148821115494, + "loss_ce": 0.003685510717332363, + "loss_iou": 0.3125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 534717492, + "step": 5514 + }, + { + "epoch": 0.5392061008994916, + "grad_norm": 4.291978251766266, + "learning_rate": 5e-05, + "loss": 0.1177, + "num_input_tokens_seen": 534813980, + "step": 5515 + }, + { + "epoch": 0.5392061008994916, + "loss": 0.06589806824922562, + "loss_ce": 0.0059157670475542545, + "loss_iou": 0.3359375, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 534813980, + "step": 5515 + }, + { + "epoch": 0.5393038717246773, + "grad_norm": 4.425836427801339, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 534911388, + "step": 5516 + }, + { + "epoch": 0.5393038717246773, + "loss": 0.054686352610588074, + "loss_ce": 0.002852248726412654, + "loss_iou": 0.2470703125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 534911388, + "step": 5516 + }, + { + "epoch": 0.5394016425498631, + "grad_norm": 15.291197846220463, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 535009016, + "step": 5517 + }, + { + "epoch": 0.5394016425498631, + "loss": 0.0585884191095829, + "loss_ce": 0.0039009186439216137, + "loss_iou": 0.353515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 535009016, + "step": 5517 + }, + { + "epoch": 0.5394994133750489, + "grad_norm": 25.47149427187987, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 535106260, + "step": 5518 + }, + { + "epoch": 0.5394994133750489, + "loss": 0.05599971488118172, + "loss_ce": 0.005302388221025467, + "loss_iou": 0.357421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 535106260, + "step": 5518 + }, + { + "epoch": 0.5395971842002346, + "grad_norm": 4.3048290656176, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 535202812, + "step": 5519 + }, + { + "epoch": 0.5395971842002346, + "loss": 0.07549597322940826, + "loss_ce": 0.0037491482216864824, + "loss_iou": 0.220703125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 535202812, + "step": 5519 + }, + { + "epoch": 0.5396949550254204, + "grad_norm": 9.032749268989226, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 535299736, + "step": 5520 + }, + { + "epoch": 0.5396949550254204, + "loss": 0.09489826112985611, + "loss_ce": 0.003345525823533535, + "loss_iou": 0.302734375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 535299736, + "step": 5520 + }, + { + "epoch": 0.5397927258506062, + "grad_norm": 2.854246473861585, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 535396284, + "step": 5521 + }, + { + "epoch": 0.5397927258506062, + "loss": 0.07612285017967224, + "loss_ce": 0.0071912724524736404, + "loss_iou": 0.2255859375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 535396284, + "step": 5521 + }, + { + "epoch": 0.5398904966757919, + "grad_norm": 3.0107071073544485, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 535493400, + "step": 5522 + }, + { + "epoch": 0.5398904966757919, + "loss": 0.048961788415908813, + "loss_ce": 0.006363065913319588, + "loss_iou": 0.2041015625, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 535493400, + "step": 5522 + }, + { + "epoch": 0.5399882675009777, + "grad_norm": 2.682115090547814, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 535590776, + "step": 5523 + }, + { + "epoch": 0.5399882675009777, + "loss": 0.05577249452471733, + "loss_ce": 0.005372712388634682, + "loss_iou": 0.30859375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 535590776, + "step": 5523 + }, + { + "epoch": 0.5400860383261634, + "grad_norm": 6.620038589170596, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 535687356, + "step": 5524 + }, + { + "epoch": 0.5400860383261634, + "loss": 0.06585939973592758, + "loss_ce": 0.0045343260280787945, + "loss_iou": 0.2734375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 535687356, + "step": 5524 + }, + { + "epoch": 0.5401838091513492, + "grad_norm": 20.31750503664567, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 535783808, + "step": 5525 + }, + { + "epoch": 0.5401838091513492, + "loss": 0.0770740956068039, + "loss_ce": 0.006609004456549883, + "loss_iou": 0.212890625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 535783808, + "step": 5525 + }, + { + "epoch": 0.540281579976535, + "grad_norm": 35.73343315245643, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 535879700, + "step": 5526 + }, + { + "epoch": 0.540281579976535, + "loss": 0.05044127628207207, + "loss_ce": 0.005918988958001137, + "loss_iou": 0.2470703125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 535879700, + "step": 5526 + }, + { + "epoch": 0.5403793508017207, + "grad_norm": 10.223776306513452, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 535976144, + "step": 5527 + }, + { + "epoch": 0.5403793508017207, + "loss": 0.0682532787322998, + "loss_ce": 0.0026099667884409428, + "loss_iou": 0.30859375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 535976144, + "step": 5527 + }, + { + "epoch": 0.5404771216269065, + "grad_norm": 3.4880400874304964, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 536072044, + "step": 5528 + }, + { + "epoch": 0.5404771216269065, + "loss": 0.05931428074836731, + "loss_ce": 0.015262153930962086, + "loss_iou": 0.2578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 536072044, + "step": 5528 + }, + { + "epoch": 0.5405748924520923, + "grad_norm": 4.331432288515428, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 536167944, + "step": 5529 + }, + { + "epoch": 0.5405748924520923, + "loss": 0.049350883811712265, + "loss_ce": 0.005993032827973366, + "loss_iou": 0.271484375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 536167944, + "step": 5529 + }, + { + "epoch": 0.540672663277278, + "grad_norm": 9.422943259134808, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 536264572, + "step": 5530 + }, + { + "epoch": 0.540672663277278, + "loss": 0.059942200779914856, + "loss_ce": 0.0025844110641628504, + "loss_iou": 0.208984375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 536264572, + "step": 5530 + }, + { + "epoch": 0.5407704341024638, + "grad_norm": 13.037325290447725, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 536362104, + "step": 5531 + }, + { + "epoch": 0.5407704341024638, + "loss": 0.08515925705432892, + "loss_ce": 0.0036277263425290585, + "loss_iou": 0.349609375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 536362104, + "step": 5531 + }, + { + "epoch": 0.5408682049276496, + "grad_norm": 3.0352834178278862, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 536458984, + "step": 5532 + }, + { + "epoch": 0.5408682049276496, + "loss": 0.07604268938302994, + "loss_ce": 0.0063405465334653854, + "loss_iou": 0.373046875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 536458984, + "step": 5532 + }, + { + "epoch": 0.5409659757528353, + "grad_norm": 4.320570775440216, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 536556672, + "step": 5533 + }, + { + "epoch": 0.5409659757528353, + "loss": 0.08079978823661804, + "loss_ce": 0.0036589845549315214, + "loss_iou": 0.361328125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 536556672, + "step": 5533 + }, + { + "epoch": 0.5410637465780211, + "grad_norm": 3.23240223710683, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 536653168, + "step": 5534 + }, + { + "epoch": 0.5410637465780211, + "loss": 0.08496596664190292, + "loss_ce": 0.005912087857723236, + "loss_iou": 0.2431640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 536653168, + "step": 5534 + }, + { + "epoch": 0.5411615174032068, + "grad_norm": 4.337474664275929, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 536751232, + "step": 5535 + }, + { + "epoch": 0.5411615174032068, + "loss": 0.09436674416065216, + "loss_ce": 0.005392747465521097, + "loss_iou": 0.318359375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 536751232, + "step": 5535 + }, + { + "epoch": 0.5412592882283926, + "grad_norm": 6.149581856975619, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 536848840, + "step": 5536 + }, + { + "epoch": 0.5412592882283926, + "loss": 0.04759161174297333, + "loss_ce": 0.0056604621931910515, + "loss_iou": 0.3828125, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 536848840, + "step": 5536 + }, + { + "epoch": 0.5413570590535784, + "grad_norm": 9.355735803310631, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 536945096, + "step": 5537 + }, + { + "epoch": 0.5413570590535784, + "loss": 0.07919254153966904, + "loss_ce": 0.010032080113887787, + "loss_iou": 0.255859375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 536945096, + "step": 5537 + }, + { + "epoch": 0.5414548298787641, + "grad_norm": 3.363537007122804, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 537042472, + "step": 5538 + }, + { + "epoch": 0.5414548298787641, + "loss": 0.11503306031227112, + "loss_ce": 0.006680399179458618, + "loss_iou": 0.33203125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 537042472, + "step": 5538 + }, + { + "epoch": 0.54155260070395, + "grad_norm": 6.417188511679128, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 537138500, + "step": 5539 + }, + { + "epoch": 0.54155260070395, + "loss": 0.03710617125034332, + "loss_ce": 0.002907410031184554, + "loss_iou": 0.1533203125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 537138500, + "step": 5539 + }, + { + "epoch": 0.5416503715291358, + "grad_norm": 14.169249849059042, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 537235652, + "step": 5540 + }, + { + "epoch": 0.5416503715291358, + "loss": 0.09399573504924774, + "loss_ce": 0.0013901479542255402, + "loss_iou": 0.267578125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 537235652, + "step": 5540 + }, + { + "epoch": 0.5417481423543214, + "grad_norm": 16.057221528538896, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 537333492, + "step": 5541 + }, + { + "epoch": 0.5417481423543214, + "loss": 0.08560654520988464, + "loss_ce": 0.004857037682086229, + "loss_iou": 0.2431640625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 537333492, + "step": 5541 + }, + { + "epoch": 0.5418459131795073, + "grad_norm": 21.368329691184023, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 537430300, + "step": 5542 + }, + { + "epoch": 0.5418459131795073, + "loss": 0.11579576134681702, + "loss_ce": 0.00952591560781002, + "loss_iou": 0.2177734375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 537430300, + "step": 5542 + }, + { + "epoch": 0.541943684004693, + "grad_norm": 9.568958544010208, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 537527204, + "step": 5543 + }, + { + "epoch": 0.541943684004693, + "loss": 0.0773649662733078, + "loss_ce": 0.0045385826379060745, + "loss_iou": 0.26171875, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 537527204, + "step": 5543 + }, + { + "epoch": 0.5420414548298788, + "grad_norm": 8.164248933866684, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 537624232, + "step": 5544 + }, + { + "epoch": 0.5420414548298788, + "loss": 0.07242315262556076, + "loss_ce": 0.013402162119746208, + "loss_iou": 0.30859375, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 537624232, + "step": 5544 + }, + { + "epoch": 0.5421392256550646, + "grad_norm": 2.3918295275537513, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 537721584, + "step": 5545 + }, + { + "epoch": 0.5421392256550646, + "loss": 0.09016144275665283, + "loss_ce": 0.006245734170079231, + "loss_iou": 0.30859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 537721584, + "step": 5545 + }, + { + "epoch": 0.5422369964802503, + "grad_norm": 3.8728146021324625, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 537818128, + "step": 5546 + }, + { + "epoch": 0.5422369964802503, + "loss": 0.10283677279949188, + "loss_ce": 0.007805037312209606, + "loss_iou": 0.259765625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 537818128, + "step": 5546 + }, + { + "epoch": 0.5423347673054361, + "grad_norm": 32.09578276736409, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 537915336, + "step": 5547 + }, + { + "epoch": 0.5423347673054361, + "loss": 0.08736316114664078, + "loss_ce": 0.004538453184068203, + "loss_iou": 0.279296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 537915336, + "step": 5547 + }, + { + "epoch": 0.5424325381306219, + "grad_norm": 4.218998455594964, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 538012040, + "step": 5548 + }, + { + "epoch": 0.5424325381306219, + "loss": 0.08430080115795135, + "loss_ce": 0.005969813093543053, + "loss_iou": 0.345703125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 538012040, + "step": 5548 + }, + { + "epoch": 0.5425303089558076, + "grad_norm": 10.959401684957829, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 538109252, + "step": 5549 + }, + { + "epoch": 0.5425303089558076, + "loss": 0.08433957397937775, + "loss_ce": 0.006966072134673595, + "loss_iou": 0.27734375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 538109252, + "step": 5549 + }, + { + "epoch": 0.5426280797809934, + "grad_norm": 6.129984347834715, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 538205716, + "step": 5550 + }, + { + "epoch": 0.5426280797809934, + "loss": 0.07062479108572006, + "loss_ce": 0.007560213096439838, + "loss_iou": 0.40234375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 538205716, + "step": 5550 + }, + { + "epoch": 0.5427258506061791, + "grad_norm": 5.307825224042477, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 538302520, + "step": 5551 + }, + { + "epoch": 0.5427258506061791, + "loss": 0.14798253774642944, + "loss_ce": 0.004641470964998007, + "loss_iou": 0.265625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 538302520, + "step": 5551 + }, + { + "epoch": 0.5428236214313649, + "grad_norm": 5.111008390886565, + "learning_rate": 5e-05, + "loss": 0.1317, + "num_input_tokens_seen": 538399524, + "step": 5552 + }, + { + "epoch": 0.5428236214313649, + "loss": 0.1323501467704773, + "loss_ce": 0.005686940159648657, + "loss_iou": 0.267578125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 538399524, + "step": 5552 + }, + { + "epoch": 0.5429213922565507, + "grad_norm": 5.347665724877978, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 538496680, + "step": 5553 + }, + { + "epoch": 0.5429213922565507, + "loss": 0.12109917402267456, + "loss_ce": 0.003942186012864113, + "loss_iou": 0.2333984375, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 538496680, + "step": 5553 + }, + { + "epoch": 0.5430191630817364, + "grad_norm": 3.88269476659295, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 538593916, + "step": 5554 + }, + { + "epoch": 0.5430191630817364, + "loss": 0.06266330927610397, + "loss_ce": 0.005976908840239048, + "loss_iou": 0.3203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 538593916, + "step": 5554 + }, + { + "epoch": 0.5431169339069222, + "grad_norm": 12.227339349004671, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 538690940, + "step": 5555 + }, + { + "epoch": 0.5431169339069222, + "loss": 0.05145822837948799, + "loss_ce": 0.006185399368405342, + "loss_iou": 0.390625, + "loss_num": 0.00909423828125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 538690940, + "step": 5555 + }, + { + "epoch": 0.543214704732108, + "grad_norm": 18.72472950457581, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 538787664, + "step": 5556 + }, + { + "epoch": 0.543214704732108, + "loss": 0.11564280092716217, + "loss_ce": 0.0032007875852286816, + "loss_iou": 0.412109375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 538787664, + "step": 5556 + }, + { + "epoch": 0.5433124755572937, + "grad_norm": 9.167894400125883, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 538884544, + "step": 5557 + }, + { + "epoch": 0.5433124755572937, + "loss": 0.06350209563970566, + "loss_ce": 0.002589011797681451, + "loss_iou": 0.3359375, + "loss_num": 0.01214599609375, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 538884544, + "step": 5557 + }, + { + "epoch": 0.5434102463824795, + "grad_norm": 3.7948063705955293, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 538980408, + "step": 5558 + }, + { + "epoch": 0.5434102463824795, + "loss": 0.08105334639549255, + "loss_ce": 0.00533923739567399, + "loss_iou": 0.431640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 538980408, + "step": 5558 + }, + { + "epoch": 0.5435080172076653, + "grad_norm": 3.208314887747416, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 539077136, + "step": 5559 + }, + { + "epoch": 0.5435080172076653, + "loss": 0.056285612285137177, + "loss_ce": 0.0059926435351371765, + "loss_iou": 0.3125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 539077136, + "step": 5559 + }, + { + "epoch": 0.543605788032851, + "grad_norm": 6.0491737345362555, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 539174324, + "step": 5560 + }, + { + "epoch": 0.543605788032851, + "loss": 0.048437658697366714, + "loss_ce": 0.002478185575455427, + "loss_iou": 0.318359375, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 539174324, + "step": 5560 + }, + { + "epoch": 0.5437035588580368, + "grad_norm": 18.17680845310469, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 539271752, + "step": 5561 + }, + { + "epoch": 0.5437035588580368, + "loss": 0.07529962062835693, + "loss_ce": 0.002354981144890189, + "loss_iou": 0.25, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 539271752, + "step": 5561 + }, + { + "epoch": 0.5438013296832225, + "grad_norm": 2.960951989821826, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 539368252, + "step": 5562 + }, + { + "epoch": 0.5438013296832225, + "loss": 0.04480870068073273, + "loss_ce": 0.0031064278446137905, + "loss_iou": 0.32421875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 539368252, + "step": 5562 + }, + { + "epoch": 0.5438991005084083, + "grad_norm": 11.93766511311124, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 539465632, + "step": 5563 + }, + { + "epoch": 0.5438991005084083, + "loss": 0.046376608312129974, + "loss_ce": 0.006299398839473724, + "loss_iou": 0.33203125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 539465632, + "step": 5563 + }, + { + "epoch": 0.5439968713335941, + "grad_norm": 5.42773157944188, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 539563384, + "step": 5564 + }, + { + "epoch": 0.5439968713335941, + "loss": 0.07647162675857544, + "loss_ce": 0.005487737711519003, + "loss_iou": 0.28515625, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 539563384, + "step": 5564 + }, + { + "epoch": 0.5440946421587798, + "grad_norm": 3.033092554686533, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 539661564, + "step": 5565 + }, + { + "epoch": 0.5440946421587798, + "loss": 0.07511039078235626, + "loss_ce": 0.005591352470219135, + "loss_iou": 0.322265625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 539661564, + "step": 5565 + }, + { + "epoch": 0.5441924129839656, + "grad_norm": 6.703748423462979, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 539758440, + "step": 5566 + }, + { + "epoch": 0.5441924129839656, + "loss": 0.05841688811779022, + "loss_ce": 0.002043289365246892, + "loss_iou": 0.23046875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 539758440, + "step": 5566 + }, + { + "epoch": 0.5442901838091514, + "grad_norm": 2.2686610256788993, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 539855848, + "step": 5567 + }, + { + "epoch": 0.5442901838091514, + "loss": 0.05075766146183014, + "loss_ce": 0.006415624171495438, + "loss_iou": 0.310546875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 539855848, + "step": 5567 + }, + { + "epoch": 0.5443879546343371, + "grad_norm": 17.0312591816986, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 539952192, + "step": 5568 + }, + { + "epoch": 0.5443879546343371, + "loss": 0.14267805218696594, + "loss_ce": 0.004433424677699804, + "loss_iou": 0.306640625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 539952192, + "step": 5568 + }, + { + "epoch": 0.5444857254595229, + "grad_norm": 4.420475460509864, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 540049244, + "step": 5569 + }, + { + "epoch": 0.5444857254595229, + "loss": 0.03392434120178223, + "loss_ce": 0.0077250003814697266, + "loss_iou": 0.30078125, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 540049244, + "step": 5569 + }, + { + "epoch": 0.5445834962847086, + "grad_norm": 7.5254713780351885, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 540146856, + "step": 5570 + }, + { + "epoch": 0.5445834962847086, + "loss": 0.0816735327243805, + "loss_ce": 0.005715277977287769, + "loss_iou": 0.259765625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 540146856, + "step": 5570 + }, + { + "epoch": 0.5446812671098944, + "grad_norm": 3.4392003586820135, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 540242932, + "step": 5571 + }, + { + "epoch": 0.5446812671098944, + "loss": 0.03219543769955635, + "loss_ce": 0.004927981644868851, + "loss_iou": 0.197265625, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 540242932, + "step": 5571 + }, + { + "epoch": 0.5447790379350802, + "grad_norm": 32.840611250569246, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 540340344, + "step": 5572 + }, + { + "epoch": 0.5447790379350802, + "loss": 0.05449967831373215, + "loss_ce": 0.007884077727794647, + "loss_iou": 0.263671875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 540340344, + "step": 5572 + }, + { + "epoch": 0.5448768087602659, + "grad_norm": 11.014327147201719, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 540436992, + "step": 5573 + }, + { + "epoch": 0.5448768087602659, + "loss": 0.06740789115428925, + "loss_ce": 0.007730770856142044, + "loss_iou": 0.3203125, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 540436992, + "step": 5573 + }, + { + "epoch": 0.5449745795854517, + "grad_norm": 56.74249980642468, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 540532968, + "step": 5574 + }, + { + "epoch": 0.5449745795854517, + "loss": 0.06355970352888107, + "loss_ce": 0.004187756218016148, + "loss_iou": 0.283203125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 540532968, + "step": 5574 + }, + { + "epoch": 0.5450723504106375, + "grad_norm": 14.267307137251711, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 540629692, + "step": 5575 + }, + { + "epoch": 0.5450723504106375, + "loss": 0.08929241448640823, + "loss_ce": 0.006620296277105808, + "loss_iou": 0.224609375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 540629692, + "step": 5575 + }, + { + "epoch": 0.5451701212358232, + "grad_norm": 7.3493144236878765, + "learning_rate": 5e-05, + "loss": 0.1053, + "num_input_tokens_seen": 540725856, + "step": 5576 + }, + { + "epoch": 0.5451701212358232, + "loss": 0.11296144872903824, + "loss_ce": 0.004166278522461653, + "loss_iou": 0.3046875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 540725856, + "step": 5576 + }, + { + "epoch": 0.545267892061009, + "grad_norm": 16.390430573191022, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 540823616, + "step": 5577 + }, + { + "epoch": 0.545267892061009, + "loss": 0.12096138298511505, + "loss_ce": 0.008473594672977924, + "loss_iou": 0.380859375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 540823616, + "step": 5577 + }, + { + "epoch": 0.5453656628861948, + "grad_norm": 12.500830342681267, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 540920632, + "step": 5578 + }, + { + "epoch": 0.5453656628861948, + "loss": 0.06279131025075912, + "loss_ce": 0.0028395240660756826, + "loss_iou": 0.3828125, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 540920632, + "step": 5578 + }, + { + "epoch": 0.5454634337113805, + "grad_norm": 3.733923846944519, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 541017744, + "step": 5579 + }, + { + "epoch": 0.5454634337113805, + "loss": 0.07196980714797974, + "loss_ce": 0.004739582538604736, + "loss_iou": 0.25, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 541017744, + "step": 5579 + }, + { + "epoch": 0.5455612045365663, + "grad_norm": 35.382181306774385, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 541114896, + "step": 5580 + }, + { + "epoch": 0.5455612045365663, + "loss": 0.0770048201084137, + "loss_ce": 0.0017332130810245872, + "loss_iou": 0.341796875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 541114896, + "step": 5580 + }, + { + "epoch": 0.545658975361752, + "grad_norm": 12.13878770849737, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 541211900, + "step": 5581 + }, + { + "epoch": 0.545658975361752, + "loss": 0.11535574495792389, + "loss_ce": 0.008483189158141613, + "loss_iou": 0.35546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 541211900, + "step": 5581 + }, + { + "epoch": 0.5457567461869378, + "grad_norm": 8.514990026459175, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 541309368, + "step": 5582 + }, + { + "epoch": 0.5457567461869378, + "loss": 0.05247696861624718, + "loss_ce": 0.00360306678339839, + "loss_iou": 0.306640625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 541309368, + "step": 5582 + }, + { + "epoch": 0.5458545170121236, + "grad_norm": 6.015163003101692, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 541406496, + "step": 5583 + }, + { + "epoch": 0.5458545170121236, + "loss": 0.0653533786535263, + "loss_ce": 0.004898054525256157, + "loss_iou": 0.310546875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 541406496, + "step": 5583 + }, + { + "epoch": 0.5459522878373093, + "grad_norm": 7.278409270193955, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 541502660, + "step": 5584 + }, + { + "epoch": 0.5459522878373093, + "loss": 0.0512794554233551, + "loss_ce": 0.007181557826697826, + "loss_iou": 0.3359375, + "loss_num": 0.0087890625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 541502660, + "step": 5584 + }, + { + "epoch": 0.5460500586624951, + "grad_norm": 3.353267984177384, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 541599312, + "step": 5585 + }, + { + "epoch": 0.5460500586624951, + "loss": 0.07510431110858917, + "loss_ce": 0.006104067899286747, + "loss_iou": 0.302734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 541599312, + "step": 5585 + }, + { + "epoch": 0.5461478294876809, + "grad_norm": 3.2187403061278785, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 541696284, + "step": 5586 + }, + { + "epoch": 0.5461478294876809, + "loss": 0.08784317970275879, + "loss_ce": 0.008680583909153938, + "loss_iou": 0.283203125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 541696284, + "step": 5586 + }, + { + "epoch": 0.5462456003128666, + "grad_norm": 5.855211009656388, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 541792932, + "step": 5587 + }, + { + "epoch": 0.5462456003128666, + "loss": 0.06255638599395752, + "loss_ce": 0.004679804667830467, + "loss_iou": 0.2470703125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 541792932, + "step": 5587 + }, + { + "epoch": 0.5463433711380524, + "grad_norm": 18.61007326096788, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 541889556, + "step": 5588 + }, + { + "epoch": 0.5463433711380524, + "loss": 0.07593801617622375, + "loss_ce": 0.008372098207473755, + "loss_iou": 0.251953125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 541889556, + "step": 5588 + }, + { + "epoch": 0.5464411419632381, + "grad_norm": 6.714964394536756, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 541986364, + "step": 5589 + }, + { + "epoch": 0.5464411419632381, + "loss": 0.049572233110666275, + "loss_ce": 0.008404018357396126, + "loss_iou": 0.322265625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 541986364, + "step": 5589 + }, + { + "epoch": 0.5465389127884239, + "grad_norm": 7.690359947454587, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 542082840, + "step": 5590 + }, + { + "epoch": 0.5465389127884239, + "loss": 0.07010459899902344, + "loss_ce": 0.004888528026640415, + "loss_iou": 0.232421875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 542082840, + "step": 5590 + }, + { + "epoch": 0.5466366836136097, + "grad_norm": 5.467164235926179, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 542179248, + "step": 5591 + }, + { + "epoch": 0.5466366836136097, + "loss": 0.10231223702430725, + "loss_ce": 0.009897374548017979, + "loss_iou": 0.265625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 542179248, + "step": 5591 + }, + { + "epoch": 0.5467344544387954, + "grad_norm": 6.626423351431373, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 542276264, + "step": 5592 + }, + { + "epoch": 0.5467344544387954, + "loss": 0.07027823477983475, + "loss_ce": 0.006527013145387173, + "loss_iou": 0.279296875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 542276264, + "step": 5592 + }, + { + "epoch": 0.5468322252639812, + "grad_norm": 8.560248116049838, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 542373048, + "step": 5593 + }, + { + "epoch": 0.5468322252639812, + "loss": 0.10380859673023224, + "loss_ce": 0.00585479149594903, + "loss_iou": 0.369140625, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 542373048, + "step": 5593 + }, + { + "epoch": 0.546929996089167, + "grad_norm": 12.979939071314497, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 542470060, + "step": 5594 + }, + { + "epoch": 0.546929996089167, + "loss": 0.054138585925102234, + "loss_ce": 0.0026249177753925323, + "loss_iou": 0.326171875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 542470060, + "step": 5594 + }, + { + "epoch": 0.5470277669143527, + "grad_norm": 8.04116186829208, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 542566744, + "step": 5595 + }, + { + "epoch": 0.5470277669143527, + "loss": 0.07672937214374542, + "loss_ce": 0.004509530961513519, + "loss_iou": 0.345703125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 542566744, + "step": 5595 + }, + { + "epoch": 0.5471255377395385, + "grad_norm": 4.559075942030936, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 542663124, + "step": 5596 + }, + { + "epoch": 0.5471255377395385, + "loss": 0.11286567896604538, + "loss_ce": 0.005291217938065529, + "loss_iou": 0.296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 542663124, + "step": 5596 + }, + { + "epoch": 0.5472233085647242, + "grad_norm": 15.379516661916814, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 542759644, + "step": 5597 + }, + { + "epoch": 0.5472233085647242, + "loss": 0.0954374223947525, + "loss_ce": 0.009362590499222279, + "loss_iou": 0.25390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 542759644, + "step": 5597 + }, + { + "epoch": 0.54732107938991, + "grad_norm": 22.382298357945572, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 542855984, + "step": 5598 + }, + { + "epoch": 0.54732107938991, + "loss": 0.11509057879447937, + "loss_ce": 0.004838199354708195, + "loss_iou": 0.267578125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 542855984, + "step": 5598 + }, + { + "epoch": 0.5474188502150958, + "grad_norm": 12.810509724564987, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 542954064, + "step": 5599 + }, + { + "epoch": 0.5474188502150958, + "loss": 0.07582354545593262, + "loss_ce": 0.006594421342015266, + "loss_iou": 0.32421875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 542954064, + "step": 5599 + }, + { + "epoch": 0.5475166210402815, + "grad_norm": 8.93168801836695, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 543051256, + "step": 5600 + }, + { + "epoch": 0.5475166210402815, + "loss": 0.040788471698760986, + "loss_ce": 0.004060564562678337, + "loss_iou": 0.357421875, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 543051256, + "step": 5600 + }, + { + "epoch": 0.5476143918654673, + "grad_norm": 6.204006645560986, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 543148508, + "step": 5601 + }, + { + "epoch": 0.5476143918654673, + "loss": 0.07846762239933014, + "loss_ce": 0.004637968726456165, + "loss_iou": 0.2412109375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 543148508, + "step": 5601 + }, + { + "epoch": 0.5477121626906531, + "grad_norm": 17.61413117362344, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 543245740, + "step": 5602 + }, + { + "epoch": 0.5477121626906531, + "loss": 0.06234414502978325, + "loss_ce": 0.0047269556671381, + "loss_iou": 0.3515625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 543245740, + "step": 5602 + }, + { + "epoch": 0.5478099335158388, + "grad_norm": 9.880149476391246, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 543342112, + "step": 5603 + }, + { + "epoch": 0.5478099335158388, + "loss": 0.044766999781131744, + "loss_ce": 0.003339385148137808, + "loss_iou": 0.4140625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 543342112, + "step": 5603 + }, + { + "epoch": 0.5479077043410246, + "grad_norm": 4.903096446902377, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 543438784, + "step": 5604 + }, + { + "epoch": 0.5479077043410246, + "loss": 0.10096335411071777, + "loss_ce": 0.00661062728613615, + "loss_iou": 0.310546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 543438784, + "step": 5604 + }, + { + "epoch": 0.5480054751662105, + "grad_norm": 9.955185678093663, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 543536120, + "step": 5605 + }, + { + "epoch": 0.5480054751662105, + "loss": 0.0827416330575943, + "loss_ce": 0.008484743535518646, + "loss_iou": 0.294921875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 543536120, + "step": 5605 + }, + { + "epoch": 0.5481032459913961, + "grad_norm": 32.795378862844316, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 543632436, + "step": 5606 + }, + { + "epoch": 0.5481032459913961, + "loss": 0.0613892562687397, + "loss_ce": 0.005526829976588488, + "loss_iou": 0.23828125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 543632436, + "step": 5606 + }, + { + "epoch": 0.548201016816582, + "grad_norm": 17.12182141922012, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 543729824, + "step": 5607 + }, + { + "epoch": 0.548201016816582, + "loss": 0.09065289795398712, + "loss_ce": 0.005066355690360069, + "loss_iou": 0.28515625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 543729824, + "step": 5607 + }, + { + "epoch": 0.5482987876417676, + "grad_norm": 10.869133755166457, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 543826304, + "step": 5608 + }, + { + "epoch": 0.5482987876417676, + "loss": 0.07273422181606293, + "loss_ce": 0.003440241329371929, + "loss_iou": 0.298828125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 543826304, + "step": 5608 + }, + { + "epoch": 0.5483965584669535, + "grad_norm": 10.588596195486836, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 543923500, + "step": 5609 + }, + { + "epoch": 0.5483965584669535, + "loss": 0.1173424944281578, + "loss_ce": 0.009005092084407806, + "loss_iou": 0.267578125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 543923500, + "step": 5609 + }, + { + "epoch": 0.5484943292921393, + "grad_norm": 4.643819834021868, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 544021188, + "step": 5610 + }, + { + "epoch": 0.5484943292921393, + "loss": 0.09318221360445023, + "loss_ce": 0.005062703974545002, + "loss_iou": 0.373046875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 544021188, + "step": 5610 + }, + { + "epoch": 0.548592100117325, + "grad_norm": 12.742135647904659, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 544118048, + "step": 5611 + }, + { + "epoch": 0.548592100117325, + "loss": 0.05957220122218132, + "loss_ce": 0.00603673979640007, + "loss_iou": 0.337890625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 544118048, + "step": 5611 + }, + { + "epoch": 0.5486898709425108, + "grad_norm": 16.155266792713174, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 544215024, + "step": 5612 + }, + { + "epoch": 0.5486898709425108, + "loss": 0.07940376549959183, + "loss_ce": 0.009121784940361977, + "loss_iou": 0.28515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 544215024, + "step": 5612 + }, + { + "epoch": 0.5487876417676966, + "grad_norm": 12.230950579562233, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 544312236, + "step": 5613 + }, + { + "epoch": 0.5487876417676966, + "loss": 0.07576785236597061, + "loss_ce": 0.008125638589262962, + "loss_iou": 0.255859375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 544312236, + "step": 5613 + }, + { + "epoch": 0.5488854125928823, + "grad_norm": 10.163119686081775, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 544408400, + "step": 5614 + }, + { + "epoch": 0.5488854125928823, + "loss": 0.09279634058475494, + "loss_ce": 0.003265396226197481, + "loss_iou": 0.2431640625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 544408400, + "step": 5614 + }, + { + "epoch": 0.5489831834180681, + "grad_norm": 9.117239077412322, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 544505436, + "step": 5615 + }, + { + "epoch": 0.5489831834180681, + "loss": 0.07221982628107071, + "loss_ce": 0.007202127017080784, + "loss_iou": 0.33984375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 544505436, + "step": 5615 + }, + { + "epoch": 0.5490809542432538, + "grad_norm": 9.332449770895813, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 544602216, + "step": 5616 + }, + { + "epoch": 0.5490809542432538, + "loss": 0.07356008887290955, + "loss_ce": 0.007016516290605068, + "loss_iou": 0.28515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 544602216, + "step": 5616 + }, + { + "epoch": 0.5491787250684396, + "grad_norm": 4.024175118183321, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 544700536, + "step": 5617 + }, + { + "epoch": 0.5491787250684396, + "loss": 0.10722164809703827, + "loss_ce": 0.006116914562880993, + "loss_iou": 0.328125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 544700536, + "step": 5617 + }, + { + "epoch": 0.5492764958936254, + "grad_norm": 12.174817269131243, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 544797564, + "step": 5618 + }, + { + "epoch": 0.5492764958936254, + "loss": 0.07124696671962738, + "loss_ce": 0.005878317169845104, + "loss_iou": 0.373046875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 544797564, + "step": 5618 + }, + { + "epoch": 0.5493742667188111, + "grad_norm": 13.733372934656083, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 544893884, + "step": 5619 + }, + { + "epoch": 0.5493742667188111, + "loss": 0.0734301507472992, + "loss_ce": 0.006589029915630817, + "loss_iou": 0.2734375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 544893884, + "step": 5619 + }, + { + "epoch": 0.5494720375439969, + "grad_norm": 3.8929847941361704, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 544990940, + "step": 5620 + }, + { + "epoch": 0.5494720375439969, + "loss": 0.09613855928182602, + "loss_ce": 0.008152567781507969, + "loss_iou": 0.1904296875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 544990940, + "step": 5620 + }, + { + "epoch": 0.5495698083691827, + "grad_norm": 12.022240469223942, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 545088376, + "step": 5621 + }, + { + "epoch": 0.5495698083691827, + "loss": 0.09900788962841034, + "loss_ce": 0.013665484264492989, + "loss_iou": 0.3515625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 545088376, + "step": 5621 + }, + { + "epoch": 0.5496675791943684, + "grad_norm": 7.996804449182283, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 545184808, + "step": 5622 + }, + { + "epoch": 0.5496675791943684, + "loss": 0.08360343426465988, + "loss_ce": 0.0051274788565933704, + "loss_iou": 0.24609375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 545184808, + "step": 5622 + }, + { + "epoch": 0.5497653500195542, + "grad_norm": 3.914266241570218, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 545281952, + "step": 5623 + }, + { + "epoch": 0.5497653500195542, + "loss": 0.032371509820222855, + "loss_ce": 0.0014037959044799209, + "loss_iou": 0.271484375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 545281952, + "step": 5623 + }, + { + "epoch": 0.54986312084474, + "grad_norm": 9.950873700242175, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 545378400, + "step": 5624 + }, + { + "epoch": 0.54986312084474, + "loss": 0.04967357963323593, + "loss_ce": 0.006586574949324131, + "loss_iou": 0.26171875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 545378400, + "step": 5624 + }, + { + "epoch": 0.5499608916699257, + "grad_norm": 3.3594469760401213, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 545475116, + "step": 5625 + }, + { + "epoch": 0.5499608916699257, + "loss": 0.05486693233251572, + "loss_ce": 0.005663060583174229, + "loss_iou": 0.232421875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 545475116, + "step": 5625 + }, + { + "epoch": 0.5500586624951115, + "grad_norm": 13.093681499983576, + "learning_rate": 5e-05, + "loss": 0.1315, + "num_input_tokens_seen": 545572188, + "step": 5626 + }, + { + "epoch": 0.5500586624951115, + "loss": 0.10261310636997223, + "loss_ce": 0.003430977463722229, + "loss_iou": 0.337890625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 545572188, + "step": 5626 + }, + { + "epoch": 0.5501564333202972, + "grad_norm": 4.092922659470928, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 545669216, + "step": 5627 + }, + { + "epoch": 0.5501564333202972, + "loss": 0.08405417948961258, + "loss_ce": 0.004113385919481516, + "loss_iou": 0.330078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 545669216, + "step": 5627 + }, + { + "epoch": 0.550254204145483, + "grad_norm": 5.145309453141946, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 545765992, + "step": 5628 + }, + { + "epoch": 0.550254204145483, + "loss": 0.07228126376867294, + "loss_ce": 0.005981826223433018, + "loss_iou": 0.287109375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 545765992, + "step": 5628 + }, + { + "epoch": 0.5503519749706688, + "grad_norm": 12.969948385461691, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 545863044, + "step": 5629 + }, + { + "epoch": 0.5503519749706688, + "loss": 0.09087055921554565, + "loss_ce": 0.0068556638434529305, + "loss_iou": 0.28515625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 545863044, + "step": 5629 + }, + { + "epoch": 0.5504497457958545, + "grad_norm": 13.229615464770987, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 545959548, + "step": 5630 + }, + { + "epoch": 0.5504497457958545, + "loss": 0.09664277732372284, + "loss_ce": 0.005639361217617989, + "loss_iou": 0.296875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 545959548, + "step": 5630 + }, + { + "epoch": 0.5505475166210403, + "grad_norm": 6.789850221246819, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 546056004, + "step": 5631 + }, + { + "epoch": 0.5505475166210403, + "loss": 0.03594181686639786, + "loss_ce": 0.006110886577516794, + "loss_iou": 0.279296875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 546056004, + "step": 5631 + }, + { + "epoch": 0.5506452874462261, + "grad_norm": 12.220873551057263, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 546152468, + "step": 5632 + }, + { + "epoch": 0.5506452874462261, + "loss": 0.08885957300662994, + "loss_ce": 0.004646311514079571, + "loss_iou": 0.359375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 546152468, + "step": 5632 + }, + { + "epoch": 0.5507430582714118, + "grad_norm": 14.681935384136292, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 546249732, + "step": 5633 + }, + { + "epoch": 0.5507430582714118, + "loss": 0.11714790016412735, + "loss_ce": 0.00458381325006485, + "loss_iou": 0.298828125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 546249732, + "step": 5633 + }, + { + "epoch": 0.5508408290965976, + "grad_norm": 15.804827875183932, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 546346984, + "step": 5634 + }, + { + "epoch": 0.5508408290965976, + "loss": 0.1075434461236, + "loss_ce": 0.0044092959724366665, + "loss_iou": 0.330078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 546346984, + "step": 5634 + }, + { + "epoch": 0.5509385999217833, + "grad_norm": 8.623592000151069, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 546442920, + "step": 5635 + }, + { + "epoch": 0.5509385999217833, + "loss": 0.11838807910680771, + "loss_ce": 0.011164565570652485, + "loss_iou": 0.2197265625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 546442920, + "step": 5635 + }, + { + "epoch": 0.5510363707469691, + "grad_norm": 3.2141741380798328, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 546539660, + "step": 5636 + }, + { + "epoch": 0.5510363707469691, + "loss": 0.030504614114761353, + "loss_ce": 0.0025962875224649906, + "loss_iou": 0.251953125, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 546539660, + "step": 5636 + }, + { + "epoch": 0.5511341415721549, + "grad_norm": 8.435788175185754, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 546637860, + "step": 5637 + }, + { + "epoch": 0.5511341415721549, + "loss": 0.10199578106403351, + "loss_ce": 0.0033782359678298235, + "loss_iou": 0.2734375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 546637860, + "step": 5637 + }, + { + "epoch": 0.5512319123973406, + "grad_norm": 6.85783240905253, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 546734392, + "step": 5638 + }, + { + "epoch": 0.5512319123973406, + "loss": 0.1646471470594406, + "loss_ce": 0.00690179318189621, + "loss_iou": 0.216796875, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 546734392, + "step": 5638 + }, + { + "epoch": 0.5513296832225264, + "grad_norm": 3.390693383792059, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 546831180, + "step": 5639 + }, + { + "epoch": 0.5513296832225264, + "loss": 0.09115351736545563, + "loss_ce": 0.008580584079027176, + "loss_iou": 0.2734375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 546831180, + "step": 5639 + }, + { + "epoch": 0.5514274540477122, + "grad_norm": 3.6305927312987385, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 546927592, + "step": 5640 + }, + { + "epoch": 0.5514274540477122, + "loss": 0.07735728472471237, + "loss_ce": 0.006449690088629723, + "loss_iou": 0.2734375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 546927592, + "step": 5640 + }, + { + "epoch": 0.5515252248728979, + "grad_norm": 3.252315436303461, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 547025152, + "step": 5641 + }, + { + "epoch": 0.5515252248728979, + "loss": 0.07925048470497131, + "loss_ce": 0.007213744334876537, + "loss_iou": 0.3125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 547025152, + "step": 5641 + }, + { + "epoch": 0.5516229956980837, + "grad_norm": 15.98616510950235, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 547122096, + "step": 5642 + }, + { + "epoch": 0.5516229956980837, + "loss": 0.08951526880264282, + "loss_ce": 0.0030589732341468334, + "loss_iou": 0.3671875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 547122096, + "step": 5642 + }, + { + "epoch": 0.5517207665232694, + "grad_norm": 6.727178301762375, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 547219248, + "step": 5643 + }, + { + "epoch": 0.5517207665232694, + "loss": 0.03410158306360245, + "loss_ce": 0.0032387757673859596, + "loss_iou": 0.296875, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 547219248, + "step": 5643 + }, + { + "epoch": 0.5518185373484552, + "grad_norm": 6.442909820872248, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 547316896, + "step": 5644 + }, + { + "epoch": 0.5518185373484552, + "loss": 0.07943494617938995, + "loss_ce": 0.004315932281315327, + "loss_iou": 0.283203125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 547316896, + "step": 5644 + }, + { + "epoch": 0.551916308173641, + "grad_norm": 11.754016707347793, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 547414404, + "step": 5645 + }, + { + "epoch": 0.551916308173641, + "loss": 0.11421814560890198, + "loss_ce": 0.008688366040587425, + "loss_iou": 0.390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 547414404, + "step": 5645 + }, + { + "epoch": 0.5520140789988267, + "grad_norm": 35.91711877872331, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 547511948, + "step": 5646 + }, + { + "epoch": 0.5520140789988267, + "loss": 0.09479954838752747, + "loss_ce": 0.006451155059039593, + "loss_iou": 0.349609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 547511948, + "step": 5646 + }, + { + "epoch": 0.5521118498240125, + "grad_norm": 20.50767407005179, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 547609124, + "step": 5647 + }, + { + "epoch": 0.5521118498240125, + "loss": 0.12285618484020233, + "loss_ce": 0.005531355272978544, + "loss_iou": 0.361328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 547609124, + "step": 5647 + }, + { + "epoch": 0.5522096206491983, + "grad_norm": 15.677681264908248, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 547705896, + "step": 5648 + }, + { + "epoch": 0.5522096206491983, + "loss": 0.09453515708446503, + "loss_ce": 0.007087031379342079, + "loss_iou": 0.1962890625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 547705896, + "step": 5648 + }, + { + "epoch": 0.552307391474384, + "grad_norm": 12.066460921912721, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 547803012, + "step": 5649 + }, + { + "epoch": 0.552307391474384, + "loss": 0.07663261890411377, + "loss_ce": 0.00522529985755682, + "loss_iou": 0.287109375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 547803012, + "step": 5649 + }, + { + "epoch": 0.5524051622995698, + "grad_norm": 10.219269679277907, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 547900088, + "step": 5650 + }, + { + "epoch": 0.5524051622995698, + "loss": 0.062111999839544296, + "loss_ce": 0.007714417763054371, + "loss_iou": 0.326171875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 547900088, + "step": 5650 + }, + { + "epoch": 0.5525029331247556, + "grad_norm": 14.035866574191246, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 547996580, + "step": 5651 + }, + { + "epoch": 0.5525029331247556, + "loss": 0.09140682220458984, + "loss_ce": 0.006323812995105982, + "loss_iou": 0.30859375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 547996580, + "step": 5651 + }, + { + "epoch": 0.5526007039499413, + "grad_norm": 25.55407365972745, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 548092536, + "step": 5652 + }, + { + "epoch": 0.5526007039499413, + "loss": 0.06974343955516815, + "loss_ce": 0.004634183831512928, + "loss_iou": 0.24609375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 548092536, + "step": 5652 + }, + { + "epoch": 0.5526984747751271, + "grad_norm": 5.989637529122565, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 548189272, + "step": 5653 + }, + { + "epoch": 0.5526984747751271, + "loss": 0.06678108870983124, + "loss_ce": 0.002213515341281891, + "loss_iou": 0.2734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 548189272, + "step": 5653 + }, + { + "epoch": 0.5527962456003128, + "grad_norm": 6.616542242082278, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 548285420, + "step": 5654 + }, + { + "epoch": 0.5527962456003128, + "loss": 0.06160404533147812, + "loss_ce": 0.0047421688213944435, + "loss_iou": 0.2333984375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 548285420, + "step": 5654 + }, + { + "epoch": 0.5528940164254986, + "grad_norm": 5.139868612584423, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 548383032, + "step": 5655 + }, + { + "epoch": 0.5528940164254986, + "loss": 0.049791593104600906, + "loss_ce": 0.0037787146866321564, + "loss_iou": 0.326171875, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 548383032, + "step": 5655 + }, + { + "epoch": 0.5529917872506844, + "grad_norm": 12.579756920841506, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 548478572, + "step": 5656 + }, + { + "epoch": 0.5529917872506844, + "loss": 0.09929943084716797, + "loss_ce": 0.0035352695267647505, + "loss_iou": 0.240234375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 548478572, + "step": 5656 + }, + { + "epoch": 0.5530895580758701, + "grad_norm": 9.43195456357669, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 548576108, + "step": 5657 + }, + { + "epoch": 0.5530895580758701, + "loss": 0.09452405571937561, + "loss_ce": 0.006282481364905834, + "loss_iou": 0.33984375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 548576108, + "step": 5657 + }, + { + "epoch": 0.5531873289010559, + "grad_norm": 5.0996005848244605, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 548673644, + "step": 5658 + }, + { + "epoch": 0.5531873289010559, + "loss": 0.07055296748876572, + "loss_ce": 0.003322744742035866, + "loss_iou": 0.443359375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 548673644, + "step": 5658 + }, + { + "epoch": 0.5532850997262417, + "grad_norm": 5.260716251954348, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 548770568, + "step": 5659 + }, + { + "epoch": 0.5532850997262417, + "loss": 0.13262158632278442, + "loss_ce": 0.0027998052537441254, + "loss_iou": 0.42578125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 548770568, + "step": 5659 + }, + { + "epoch": 0.5533828705514274, + "grad_norm": 4.27085233054512, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 548867624, + "step": 5660 + }, + { + "epoch": 0.5533828705514274, + "loss": 0.07283160090446472, + "loss_ce": 0.0030379053205251694, + "loss_iou": 0.396484375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 548867624, + "step": 5660 + }, + { + "epoch": 0.5534806413766132, + "grad_norm": 6.054320157831065, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 548965024, + "step": 5661 + }, + { + "epoch": 0.5534806413766132, + "loss": 0.045813240110874176, + "loss_ce": 0.003988898359239101, + "loss_iou": 0.255859375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 548965024, + "step": 5661 + }, + { + "epoch": 0.5535784122017989, + "grad_norm": 9.49608424439039, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 549060992, + "step": 5662 + }, + { + "epoch": 0.5535784122017989, + "loss": 0.089475117623806, + "loss_ce": 0.004422629252076149, + "loss_iou": 0.2734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 549060992, + "step": 5662 + }, + { + "epoch": 0.5536761830269847, + "grad_norm": 5.379849103053423, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 549158712, + "step": 5663 + }, + { + "epoch": 0.5536761830269847, + "loss": 0.05363166332244873, + "loss_ce": 0.009297255426645279, + "loss_iou": 0.251953125, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 549158712, + "step": 5663 + }, + { + "epoch": 0.5537739538521705, + "grad_norm": 4.0727855823571195, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 549255848, + "step": 5664 + }, + { + "epoch": 0.5537739538521705, + "loss": 0.07439873367547989, + "loss_ce": 0.004894949030131102, + "loss_iou": 0.2216796875, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 549255848, + "step": 5664 + }, + { + "epoch": 0.5538717246773562, + "grad_norm": 5.95820996669408, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 549352564, + "step": 5665 + }, + { + "epoch": 0.5538717246773562, + "loss": 0.037458524107933044, + "loss_ce": 0.002348050009459257, + "loss_iou": 0.318359375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 549352564, + "step": 5665 + }, + { + "epoch": 0.553969495502542, + "grad_norm": 17.773388353134106, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 549449156, + "step": 5666 + }, + { + "epoch": 0.553969495502542, + "loss": 0.12075565755367279, + "loss_ce": 0.007993210107088089, + "loss_iou": 0.2890625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 549449156, + "step": 5666 + }, + { + "epoch": 0.5540672663277278, + "grad_norm": 7.787653967102125, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 549545136, + "step": 5667 + }, + { + "epoch": 0.5540672663277278, + "loss": 0.0978752076625824, + "loss_ce": 0.007497407030314207, + "loss_iou": 0.251953125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 549545136, + "step": 5667 + }, + { + "epoch": 0.5541650371529135, + "grad_norm": 7.49366581363356, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 549642916, + "step": 5668 + }, + { + "epoch": 0.5541650371529135, + "loss": 0.07504571974277496, + "loss_ce": 0.003604062832891941, + "loss_iou": 0.390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 549642916, + "step": 5668 + }, + { + "epoch": 0.5542628079780993, + "grad_norm": 4.404856100171287, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 549740976, + "step": 5669 + }, + { + "epoch": 0.5542628079780993, + "loss": 0.062302231788635254, + "loss_ce": 0.007103567942976952, + "loss_iou": 0.30859375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 549740976, + "step": 5669 + }, + { + "epoch": 0.5543605788032852, + "grad_norm": 21.55868995348269, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 549838464, + "step": 5670 + }, + { + "epoch": 0.5543605788032852, + "loss": 0.10212180018424988, + "loss_ce": 0.006487337406724691, + "loss_iou": 0.35546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 549838464, + "step": 5670 + }, + { + "epoch": 0.5544583496284708, + "grad_norm": 20.979793784660526, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 549935232, + "step": 5671 + }, + { + "epoch": 0.5544583496284708, + "loss": 0.103386789560318, + "loss_ce": 0.010086926631629467, + "loss_iou": 0.271484375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 549935232, + "step": 5671 + }, + { + "epoch": 0.5545561204536567, + "grad_norm": 9.992339369522055, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 550032572, + "step": 5672 + }, + { + "epoch": 0.5545561204536567, + "loss": 0.08130217343568802, + "loss_ce": 0.003924854099750519, + "loss_iou": 0.296875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 550032572, + "step": 5672 + }, + { + "epoch": 0.5546538912788423, + "grad_norm": 2.989608424809447, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 550129340, + "step": 5673 + }, + { + "epoch": 0.5546538912788423, + "loss": 0.07928379625082016, + "loss_ce": 0.007979475893080235, + "loss_iou": 0.283203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 550129340, + "step": 5673 + }, + { + "epoch": 0.5547516621040282, + "grad_norm": 19.463261886298127, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 550226820, + "step": 5674 + }, + { + "epoch": 0.5547516621040282, + "loss": 0.10127612948417664, + "loss_ce": 0.00708362041041255, + "loss_iou": 0.30859375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 550226820, + "step": 5674 + }, + { + "epoch": 0.554849432929214, + "grad_norm": 5.523697200147971, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 550324032, + "step": 5675 + }, + { + "epoch": 0.554849432929214, + "loss": 0.03610817342996597, + "loss_ce": 0.0029355683363974094, + "loss_iou": 0.2451171875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 550324032, + "step": 5675 + }, + { + "epoch": 0.5549472037543997, + "grad_norm": 29.923327872994193, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 550421660, + "step": 5676 + }, + { + "epoch": 0.5549472037543997, + "loss": 0.12398983538150787, + "loss_ce": 0.007580523379147053, + "loss_iou": 0.306640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 550421660, + "step": 5676 + }, + { + "epoch": 0.5550449745795855, + "grad_norm": 8.615738401594738, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 550518960, + "step": 5677 + }, + { + "epoch": 0.5550449745795855, + "loss": 0.10227436572313309, + "loss_ce": 0.003839916782453656, + "loss_iou": 0.37109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 550518960, + "step": 5677 + }, + { + "epoch": 0.5551427454047713, + "grad_norm": 11.3462826746444, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 550616648, + "step": 5678 + }, + { + "epoch": 0.5551427454047713, + "loss": 0.09065508842468262, + "loss_ce": 0.0030391206964850426, + "loss_iou": 0.349609375, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 550616648, + "step": 5678 + }, + { + "epoch": 0.555240516229957, + "grad_norm": 2.4717893141625864, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 550713024, + "step": 5679 + }, + { + "epoch": 0.555240516229957, + "loss": 0.0823492705821991, + "loss_ce": 0.005788302980363369, + "loss_iou": 0.2216796875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 550713024, + "step": 5679 + }, + { + "epoch": 0.5553382870551428, + "grad_norm": 4.089774177884603, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 550809864, + "step": 5680 + }, + { + "epoch": 0.5553382870551428, + "loss": 0.1261957287788391, + "loss_ce": 0.004339044913649559, + "loss_iou": 0.248046875, + "loss_num": 0.0242919921875, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 550809864, + "step": 5680 + }, + { + "epoch": 0.5554360578803285, + "grad_norm": 3.520586860883465, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 550908144, + "step": 5681 + }, + { + "epoch": 0.5554360578803285, + "loss": 0.07089294493198395, + "loss_ce": 0.0017248468939214945, + "loss_iou": 0.3515625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 550908144, + "step": 5681 + }, + { + "epoch": 0.5555338287055143, + "grad_norm": 16.73879405471208, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 551004728, + "step": 5682 + }, + { + "epoch": 0.5555338287055143, + "loss": 0.08519487082958221, + "loss_ce": 0.00557450857013464, + "loss_iou": 0.33984375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 551004728, + "step": 5682 + }, + { + "epoch": 0.5556315995307001, + "grad_norm": 12.004023849841417, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 551101868, + "step": 5683 + }, + { + "epoch": 0.5556315995307001, + "loss": 0.06487676501274109, + "loss_ce": 0.0027887544129043818, + "loss_iou": 0.31640625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 551101868, + "step": 5683 + }, + { + "epoch": 0.5557293703558858, + "grad_norm": 6.395825325911499, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 551197860, + "step": 5684 + }, + { + "epoch": 0.5557293703558858, + "loss": 0.10367921739816666, + "loss_ce": 0.007686170749366283, + "loss_iou": 0.333984375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 551197860, + "step": 5684 + }, + { + "epoch": 0.5558271411810716, + "grad_norm": 7.437904525130213, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 551294396, + "step": 5685 + }, + { + "epoch": 0.5558271411810716, + "loss": 0.07531998306512833, + "loss_ce": 0.007433630991727114, + "loss_iou": 0.1953125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 551294396, + "step": 5685 + }, + { + "epoch": 0.5559249120062574, + "grad_norm": 2.3220049131725267, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 551391552, + "step": 5686 + }, + { + "epoch": 0.5559249120062574, + "loss": 0.058773770928382874, + "loss_ce": 0.003453032812103629, + "loss_iou": 0.263671875, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 551391552, + "step": 5686 + }, + { + "epoch": 0.5560226828314431, + "grad_norm": 7.34575516789826, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 551488388, + "step": 5687 + }, + { + "epoch": 0.5560226828314431, + "loss": 0.1207282766699791, + "loss_ce": 0.009888437576591969, + "loss_iou": 0.33203125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 551488388, + "step": 5687 + }, + { + "epoch": 0.5561204536566289, + "grad_norm": 25.102714818644955, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 551585732, + "step": 5688 + }, + { + "epoch": 0.5561204536566289, + "loss": 0.03823439031839371, + "loss_ce": 0.003978407941758633, + "loss_iou": 0.330078125, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 551585732, + "step": 5688 + }, + { + "epoch": 0.5562182244818146, + "grad_norm": 8.62991316574205, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 551681952, + "step": 5689 + }, + { + "epoch": 0.5562182244818146, + "loss": 0.10548955202102661, + "loss_ce": 0.005422414280474186, + "loss_iou": 0.306640625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 551681952, + "step": 5689 + }, + { + "epoch": 0.5563159953070004, + "grad_norm": 5.922115137421701, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 551778940, + "step": 5690 + }, + { + "epoch": 0.5563159953070004, + "loss": 0.05374451354146004, + "loss_ce": 0.004336557351052761, + "loss_iou": 0.205078125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 551778940, + "step": 5690 + }, + { + "epoch": 0.5564137661321862, + "grad_norm": 7.549187785396689, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 551875160, + "step": 5691 + }, + { + "epoch": 0.5564137661321862, + "loss": 0.09222599864006042, + "loss_ce": 0.008119558915495872, + "loss_iou": 0.279296875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 551875160, + "step": 5691 + }, + { + "epoch": 0.5565115369573719, + "grad_norm": 11.807905258575959, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 551972624, + "step": 5692 + }, + { + "epoch": 0.5565115369573719, + "loss": 0.06171731278300285, + "loss_ce": 0.005748072173446417, + "loss_iou": 0.33984375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 551972624, + "step": 5692 + }, + { + "epoch": 0.5566093077825577, + "grad_norm": 4.291645729810135, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 552070356, + "step": 5693 + }, + { + "epoch": 0.5566093077825577, + "loss": 0.05650172010064125, + "loss_ce": 0.0049422746524214745, + "loss_iou": 0.431640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 552070356, + "step": 5693 + }, + { + "epoch": 0.5567070786077435, + "grad_norm": 4.794579553780003, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 552167640, + "step": 5694 + }, + { + "epoch": 0.5567070786077435, + "loss": 0.1104840412735939, + "loss_ce": 0.0068692369386553764, + "loss_iou": 0.388671875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 552167640, + "step": 5694 + }, + { + "epoch": 0.5568048494329292, + "grad_norm": 14.634560392568972, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 552265092, + "step": 5695 + }, + { + "epoch": 0.5568048494329292, + "loss": 0.06522023677825928, + "loss_ce": 0.0026897182688117027, + "loss_iou": 0.462890625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 552265092, + "step": 5695 + }, + { + "epoch": 0.556902620258115, + "grad_norm": 33.70283651356515, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 552362108, + "step": 5696 + }, + { + "epoch": 0.556902620258115, + "loss": 0.12401740252971649, + "loss_ce": 0.004388498142361641, + "loss_iou": 0.3359375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 552362108, + "step": 5696 + }, + { + "epoch": 0.5570003910833008, + "grad_norm": 13.1449683629982, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 552458884, + "step": 5697 + }, + { + "epoch": 0.5570003910833008, + "loss": 0.0334104523062706, + "loss_ce": 0.005719564855098724, + "loss_iou": 0.3359375, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 552458884, + "step": 5697 + }, + { + "epoch": 0.5570981619084865, + "grad_norm": 9.291986804888012, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 552554308, + "step": 5698 + }, + { + "epoch": 0.5570981619084865, + "loss": 0.05713259428739548, + "loss_ce": 0.005905022844672203, + "loss_iou": 0.267578125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 552554308, + "step": 5698 + }, + { + "epoch": 0.5571959327336723, + "grad_norm": 77.30272310366634, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 552651232, + "step": 5699 + }, + { + "epoch": 0.5571959327336723, + "loss": 0.062041714787483215, + "loss_ce": 0.0036768466234207153, + "loss_iou": 0.27734375, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 552651232, + "step": 5699 + }, + { + "epoch": 0.557293703558858, + "grad_norm": 16.491447418924906, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 552748016, + "step": 5700 + }, + { + "epoch": 0.557293703558858, + "loss": 0.1037847250699997, + "loss_ce": 0.004144836217164993, + "loss_iou": 0.302734375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 552748016, + "step": 5700 + }, + { + "epoch": 0.5573914743840438, + "grad_norm": 3.8927936410884074, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 552844396, + "step": 5701 + }, + { + "epoch": 0.5573914743840438, + "loss": 0.078492671251297, + "loss_ce": 0.008058100007474422, + "loss_iou": 0.29296875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 552844396, + "step": 5701 + }, + { + "epoch": 0.5574892452092296, + "grad_norm": 6.672335604662788, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 552941484, + "step": 5702 + }, + { + "epoch": 0.5574892452092296, + "loss": 0.08159296214580536, + "loss_ce": 0.006931701675057411, + "loss_iou": 0.208984375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 552941484, + "step": 5702 + }, + { + "epoch": 0.5575870160344153, + "grad_norm": 7.668524953915041, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 553038136, + "step": 5703 + }, + { + "epoch": 0.5575870160344153, + "loss": 0.07154301553964615, + "loss_ce": 0.0064337607473134995, + "loss_iou": 0.306640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 553038136, + "step": 5703 + }, + { + "epoch": 0.5576847868596011, + "grad_norm": 12.023826293062385, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 553135056, + "step": 5704 + }, + { + "epoch": 0.5576847868596011, + "loss": 0.10051265358924866, + "loss_ce": 0.008395344018936157, + "loss_iou": 0.357421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 553135056, + "step": 5704 + }, + { + "epoch": 0.5577825576847869, + "grad_norm": 4.857302903055015, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 553232076, + "step": 5705 + }, + { + "epoch": 0.5577825576847869, + "loss": 0.1008048951625824, + "loss_ce": 0.010106655769050121, + "loss_iou": 0.306640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 553232076, + "step": 5705 + }, + { + "epoch": 0.5578803285099726, + "grad_norm": 6.352253441274154, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 553329536, + "step": 5706 + }, + { + "epoch": 0.5578803285099726, + "loss": 0.06007704138755798, + "loss_ce": 0.004397716373205185, + "loss_iou": 0.283203125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 553329536, + "step": 5706 + }, + { + "epoch": 0.5579780993351584, + "grad_norm": 8.257997328054097, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 553425552, + "step": 5707 + }, + { + "epoch": 0.5579780993351584, + "loss": 0.052166640758514404, + "loss_ce": 0.003964128438383341, + "loss_iou": 0.376953125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 553425552, + "step": 5707 + }, + { + "epoch": 0.5580758701603441, + "grad_norm": 9.21732654626646, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 553522708, + "step": 5708 + }, + { + "epoch": 0.5580758701603441, + "loss": 0.09106964617967606, + "loss_ce": 0.005849306471645832, + "loss_iou": 0.263671875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 553522708, + "step": 5708 + }, + { + "epoch": 0.5581736409855299, + "grad_norm": 7.788058196449587, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 553618924, + "step": 5709 + }, + { + "epoch": 0.5581736409855299, + "loss": 0.09259536117315292, + "loss_ce": 0.006680750288069248, + "loss_iou": 0.384765625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 553618924, + "step": 5709 + }, + { + "epoch": 0.5582714118107157, + "grad_norm": 10.011538125573997, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 553715572, + "step": 5710 + }, + { + "epoch": 0.5582714118107157, + "loss": 0.08223333954811096, + "loss_ce": 0.004352477379143238, + "loss_iou": 0.3203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 553715572, + "step": 5710 + }, + { + "epoch": 0.5583691826359014, + "grad_norm": 12.04690129136731, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 553812376, + "step": 5711 + }, + { + "epoch": 0.5583691826359014, + "loss": 0.07244640588760376, + "loss_ce": 0.006749691441655159, + "loss_iou": 0.34765625, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 553812376, + "step": 5711 + }, + { + "epoch": 0.5584669534610872, + "grad_norm": 13.028301982189536, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 553910108, + "step": 5712 + }, + { + "epoch": 0.5584669534610872, + "loss": 0.08605159819126129, + "loss_ce": 0.004874837584793568, + "loss_iou": 0.373046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 553910108, + "step": 5712 + }, + { + "epoch": 0.558564724286273, + "grad_norm": 5.6404661299488215, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 554008176, + "step": 5713 + }, + { + "epoch": 0.558564724286273, + "loss": 0.03965390473604202, + "loss_ce": 0.0032769511453807354, + "loss_iou": 0.40625, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 554008176, + "step": 5713 + }, + { + "epoch": 0.5586624951114587, + "grad_norm": 6.151858382640131, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 554105288, + "step": 5714 + }, + { + "epoch": 0.5586624951114587, + "loss": 0.10155031085014343, + "loss_ce": 0.011172506026923656, + "loss_iou": 0.34765625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 554105288, + "step": 5714 + }, + { + "epoch": 0.5587602659366445, + "grad_norm": 8.173592044045106, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 554202108, + "step": 5715 + }, + { + "epoch": 0.5587602659366445, + "loss": 0.11749309301376343, + "loss_ce": 0.006264153402298689, + "loss_iou": 0.208984375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 554202108, + "step": 5715 + }, + { + "epoch": 0.5588580367618303, + "grad_norm": 7.117042026294251, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 554299408, + "step": 5716 + }, + { + "epoch": 0.5588580367618303, + "loss": 0.07637974619865417, + "loss_ce": 0.004510846920311451, + "loss_iou": 0.287109375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 554299408, + "step": 5716 + }, + { + "epoch": 0.558955807587016, + "grad_norm": 11.738165111496018, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 554396084, + "step": 5717 + }, + { + "epoch": 0.558955807587016, + "loss": 0.08271631598472595, + "loss_ce": 0.00213846517726779, + "loss_iou": 0.30859375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 554396084, + "step": 5717 + }, + { + "epoch": 0.5590535784122018, + "grad_norm": 30.445880492907218, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 554492988, + "step": 5718 + }, + { + "epoch": 0.5590535784122018, + "loss": 0.11532802879810333, + "loss_ce": 0.008470738306641579, + "loss_iou": 0.22265625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 554492988, + "step": 5718 + }, + { + "epoch": 0.5591513492373875, + "grad_norm": 16.03887690336624, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 554590388, + "step": 5719 + }, + { + "epoch": 0.5591513492373875, + "loss": 0.07178729772567749, + "loss_ce": 0.00474017346277833, + "loss_iou": 0.380859375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 554590388, + "step": 5719 + }, + { + "epoch": 0.5592491200625733, + "grad_norm": 16.62269281044761, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 554686764, + "step": 5720 + }, + { + "epoch": 0.5592491200625733, + "loss": 0.07226496934890747, + "loss_ce": 0.004401497542858124, + "loss_iou": 0.1923828125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 554686764, + "step": 5720 + }, + { + "epoch": 0.5593468908877591, + "grad_norm": 15.452952692368559, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 554783760, + "step": 5721 + }, + { + "epoch": 0.5593468908877591, + "loss": 0.07313543558120728, + "loss_ce": 0.00340277305804193, + "loss_iou": 0.2451171875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 554783760, + "step": 5721 + }, + { + "epoch": 0.5594446617129448, + "grad_norm": 11.497144960445452, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 554880692, + "step": 5722 + }, + { + "epoch": 0.5594446617129448, + "loss": 0.04751111567020416, + "loss_ce": 0.005740180611610413, + "loss_iou": 0.283203125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 554880692, + "step": 5722 + }, + { + "epoch": 0.5595424325381306, + "grad_norm": 14.297076869686997, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 554977224, + "step": 5723 + }, + { + "epoch": 0.5595424325381306, + "loss": 0.07688792049884796, + "loss_ce": 0.006941631436347961, + "loss_iou": 0.306640625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 554977224, + "step": 5723 + }, + { + "epoch": 0.5596402033633164, + "grad_norm": 12.77763718529994, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 555073944, + "step": 5724 + }, + { + "epoch": 0.5596402033633164, + "loss": 0.0681128203868866, + "loss_ce": 0.004727814346551895, + "loss_iou": 0.2578125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 555073944, + "step": 5724 + }, + { + "epoch": 0.5597379741885021, + "grad_norm": 5.870426914343215, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 555171020, + "step": 5725 + }, + { + "epoch": 0.5597379741885021, + "loss": 0.0846334844827652, + "loss_ce": 0.006767886690795422, + "loss_iou": 0.306640625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 555171020, + "step": 5725 + }, + { + "epoch": 0.5598357450136879, + "grad_norm": 3.749410196570151, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 555267664, + "step": 5726 + }, + { + "epoch": 0.5598357450136879, + "loss": 0.05477733165025711, + "loss_ce": 0.004308886826038361, + "loss_iou": 0.220703125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 555267664, + "step": 5726 + }, + { + "epoch": 0.5599335158388736, + "grad_norm": 7.985363890218149, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 555364860, + "step": 5727 + }, + { + "epoch": 0.5599335158388736, + "loss": 0.09629662334918976, + "loss_ce": 0.008024530485272408, + "loss_iou": 0.2392578125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 555364860, + "step": 5727 + }, + { + "epoch": 0.5600312866640594, + "grad_norm": 14.923052893019243, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 555461520, + "step": 5728 + }, + { + "epoch": 0.5600312866640594, + "loss": 0.09445631504058838, + "loss_ce": 0.003437635488808155, + "loss_iou": 0.341796875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 555461520, + "step": 5728 + }, + { + "epoch": 0.5601290574892452, + "grad_norm": 29.06093191691064, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 555558236, + "step": 5729 + }, + { + "epoch": 0.5601290574892452, + "loss": 0.08258335292339325, + "loss_ce": 0.005633275490254164, + "loss_iou": 0.28125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 555558236, + "step": 5729 + }, + { + "epoch": 0.5602268283144309, + "grad_norm": 6.3634916543802085, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 555655708, + "step": 5730 + }, + { + "epoch": 0.5602268283144309, + "loss": 0.057980768382549286, + "loss_ce": 0.005650751758366823, + "loss_iou": 0.3046875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 555655708, + "step": 5730 + }, + { + "epoch": 0.5603245991396167, + "grad_norm": 4.135693101612936, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 555752792, + "step": 5731 + }, + { + "epoch": 0.5603245991396167, + "loss": 0.05828738585114479, + "loss_ce": 0.005446200259029865, + "loss_iou": 0.259765625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 555752792, + "step": 5731 + }, + { + "epoch": 0.5604223699648025, + "grad_norm": 3.6948735202714458, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 555848284, + "step": 5732 + }, + { + "epoch": 0.5604223699648025, + "loss": 0.0884639322757721, + "loss_ce": 0.010193977504968643, + "loss_iou": 0.1904296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 555848284, + "step": 5732 + }, + { + "epoch": 0.5605201407899882, + "grad_norm": 5.461768930221304, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 555945304, + "step": 5733 + }, + { + "epoch": 0.5605201407899882, + "loss": 0.06469219923019409, + "loss_ce": 0.008783990517258644, + "loss_iou": 0.279296875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 555945304, + "step": 5733 + }, + { + "epoch": 0.560617911615174, + "grad_norm": 18.038199169576934, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 556042444, + "step": 5734 + }, + { + "epoch": 0.560617911615174, + "loss": 0.04370519518852234, + "loss_ce": 0.008716791868209839, + "loss_iou": 0.29296875, + "loss_num": 0.00701904296875, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 556042444, + "step": 5734 + }, + { + "epoch": 0.5607156824403597, + "grad_norm": 5.333164883074822, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 556139256, + "step": 5735 + }, + { + "epoch": 0.5607156824403597, + "loss": 0.07114384323358536, + "loss_ce": 0.008155560120940208, + "loss_iou": 0.365234375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 556139256, + "step": 5735 + }, + { + "epoch": 0.5608134532655455, + "grad_norm": 13.927787982182224, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 556236064, + "step": 5736 + }, + { + "epoch": 0.5608134532655455, + "loss": 0.05834869295358658, + "loss_ce": 0.002921145176514983, + "loss_iou": 0.2265625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 556236064, + "step": 5736 + }, + { + "epoch": 0.5609112240907314, + "grad_norm": 30.991174598989122, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 556333004, + "step": 5737 + }, + { + "epoch": 0.5609112240907314, + "loss": 0.07282314449548721, + "loss_ce": 0.005722622852772474, + "loss_iou": 0.265625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 556333004, + "step": 5737 + }, + { + "epoch": 0.561008994915917, + "grad_norm": 43.70840131926283, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 556430124, + "step": 5738 + }, + { + "epoch": 0.561008994915917, + "loss": 0.09517443925142288, + "loss_ce": 0.005422245245426893, + "loss_iou": 0.375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 556430124, + "step": 5738 + }, + { + "epoch": 0.5611067657411029, + "grad_norm": 12.44562764237594, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 556526792, + "step": 5739 + }, + { + "epoch": 0.5611067657411029, + "loss": 0.061952635645866394, + "loss_ce": 0.004564330913126469, + "loss_iou": 0.283203125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 556526792, + "step": 5739 + }, + { + "epoch": 0.5612045365662887, + "grad_norm": 3.602067085549739, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 556623940, + "step": 5740 + }, + { + "epoch": 0.5612045365662887, + "loss": 0.0478401780128479, + "loss_ce": 0.0054360018111765385, + "loss_iou": 0.306640625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 556623940, + "step": 5740 + }, + { + "epoch": 0.5613023073914744, + "grad_norm": 12.594101974384284, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 556720724, + "step": 5741 + }, + { + "epoch": 0.5613023073914744, + "loss": 0.04550262540578842, + "loss_ce": 0.004782640840858221, + "loss_iou": 0.29296875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 556720724, + "step": 5741 + }, + { + "epoch": 0.5614000782166602, + "grad_norm": 8.999587846173297, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 556818676, + "step": 5742 + }, + { + "epoch": 0.5614000782166602, + "loss": 0.10437871515750885, + "loss_ce": 0.0037927855737507343, + "loss_iou": 0.32421875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 556818676, + "step": 5742 + }, + { + "epoch": 0.561497849041846, + "grad_norm": 14.385112689677337, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 556915924, + "step": 5743 + }, + { + "epoch": 0.561497849041846, + "loss": 0.12183759361505508, + "loss_ce": 0.003631575033068657, + "loss_iou": 0.31640625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 556915924, + "step": 5743 + }, + { + "epoch": 0.5615956198670317, + "grad_norm": 7.484588209065056, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 557013316, + "step": 5744 + }, + { + "epoch": 0.5615956198670317, + "loss": 0.07569495588541031, + "loss_ce": 0.005687633529305458, + "loss_iou": 0.32421875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 557013316, + "step": 5744 + }, + { + "epoch": 0.5616933906922175, + "grad_norm": 3.830005760366718, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 557111392, + "step": 5745 + }, + { + "epoch": 0.5616933906922175, + "loss": 0.0630280077457428, + "loss_ce": 0.009584097191691399, + "loss_iou": 0.390625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 557111392, + "step": 5745 + }, + { + "epoch": 0.5617911615174032, + "grad_norm": 3.3688696321287095, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 557209628, + "step": 5746 + }, + { + "epoch": 0.5617911615174032, + "loss": 0.061551809310913086, + "loss_ce": 0.0029275419656187296, + "loss_iou": 0.34765625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 557209628, + "step": 5746 + }, + { + "epoch": 0.561888932342589, + "grad_norm": 10.692301857766758, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 557306864, + "step": 5747 + }, + { + "epoch": 0.561888932342589, + "loss": 0.1061754897236824, + "loss_ce": 0.0036974612157791853, + "loss_iou": 0.34765625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 557306864, + "step": 5747 + }, + { + "epoch": 0.5619867031677748, + "grad_norm": 29.93425545744919, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 557403636, + "step": 5748 + }, + { + "epoch": 0.5619867031677748, + "loss": 0.10162628442049026, + "loss_ce": 0.004031069576740265, + "loss_iou": 0.291015625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 557403636, + "step": 5748 + }, + { + "epoch": 0.5620844739929605, + "grad_norm": 15.259775029464711, + "learning_rate": 5e-05, + "loss": 0.1245, + "num_input_tokens_seen": 557500456, + "step": 5749 + }, + { + "epoch": 0.5620844739929605, + "loss": 0.17676547169685364, + "loss_ce": 0.0037079118192195892, + "loss_iou": 0.26953125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 557500456, + "step": 5749 + }, + { + "epoch": 0.5621822448181463, + "grad_norm": 6.579414822381534, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 557597968, + "step": 5750 + }, + { + "epoch": 0.5621822448181463, + "eval_seeclick_CIoU": 0.5142585039138794, + "eval_seeclick_GIoU": 0.5224614441394806, + "eval_seeclick_IoU": 0.5529481768608093, + "eval_seeclick_MAE_all": 0.06788626499474049, + "eval_seeclick_MAE_h": 0.041896335780620575, + "eval_seeclick_MAE_w": 0.08243530429899693, + "eval_seeclick_MAE_x": 0.1078028678894043, + "eval_seeclick_MAE_y": 0.0394105389714241, + "eval_seeclick_NUM_probability": 0.9999964237213135, + "eval_seeclick_inside_bbox": 0.8451704680919647, + "eval_seeclick_loss": 0.26662060618400574, + "eval_seeclick_loss_ce": 0.009822700172662735, + "eval_seeclick_loss_iou": 0.4193115234375, + "eval_seeclick_loss_num": 0.051788330078125, + "eval_seeclick_loss_xval": 0.25897216796875, + "eval_seeclick_runtime": 78.8402, + "eval_seeclick_samples_per_second": 0.545, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 557597968, + "step": 5750 + }, + { + "epoch": 0.5621822448181463, + "eval_icons_CIoU": 0.6752946972846985, + "eval_icons_GIoU": 0.6661202609539032, + "eval_icons_IoU": 0.7095775902271271, + "eval_icons_MAE_all": 0.0667756125330925, + "eval_icons_MAE_h": 0.07602916285395622, + "eval_icons_MAE_w": 0.05881456285715103, + "eval_icons_MAE_x": 0.060152096673846245, + "eval_icons_MAE_y": 0.07210664078593254, + "eval_icons_NUM_probability": 0.9999968111515045, + "eval_icons_inside_bbox": 0.7951388955116272, + "eval_icons_loss": 0.19180230796337128, + "eval_icons_loss_ce": 4.705400442617247e-06, + "eval_icons_loss_iou": 0.4066162109375, + "eval_icons_loss_num": 0.03853034973144531, + "eval_icons_loss_xval": 0.192779541015625, + "eval_icons_runtime": 86.0277, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 557597968, + "step": 5750 + }, + { + "epoch": 0.5621822448181463, + "eval_screenspot_CIoU": 0.2603745311498642, + "eval_screenspot_GIoU": 0.23544191817442575, + "eval_screenspot_IoU": 0.35999729235967, + "eval_screenspot_MAE_all": 0.18018304308255514, + "eval_screenspot_MAE_h": 0.13660845905542374, + "eval_screenspot_MAE_w": 0.23425386349360147, + "eval_screenspot_MAE_x": 0.21177029609680176, + "eval_screenspot_MAE_y": 0.1380995586514473, + "eval_screenspot_NUM_probability": 0.999977707862854, + "eval_screenspot_inside_bbox": 0.5645833412806193, + "eval_screenspot_loss": 0.6273177266120911, + "eval_screenspot_loss_ce": 0.025817299882570904, + "eval_screenspot_loss_iou": 0.3474934895833333, + "eval_screenspot_loss_num": 0.12006632486979167, + "eval_screenspot_loss_xval": 0.599853515625, + "eval_screenspot_runtime": 162.5489, + "eval_screenspot_samples_per_second": 0.548, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 557597968, + "step": 5750 + }, + { + "epoch": 0.5621822448181463, + "eval_compot_CIoU": 0.45559075474739075, + "eval_compot_GIoU": 0.4448898583650589, + "eval_compot_IoU": 0.5193458795547485, + "eval_compot_MAE_all": 0.0971498042345047, + "eval_compot_MAE_h": 0.09437883272767067, + "eval_compot_MAE_w": 0.09886115789413452, + "eval_compot_MAE_x": 0.09928906336426735, + "eval_compot_MAE_y": 0.09607017785310745, + "eval_compot_NUM_probability": 0.9999521970748901, + "eval_compot_inside_bbox": 0.7204861044883728, + "eval_compot_loss": 0.31352871656417847, + "eval_compot_loss_ce": 0.02090525906533003, + "eval_compot_loss_iou": 0.42864990234375, + "eval_compot_loss_num": 0.050350189208984375, + "eval_compot_loss_xval": 0.2516632080078125, + "eval_compot_runtime": 88.6616, + "eval_compot_samples_per_second": 0.564, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 557597968, + "step": 5750 + }, + { + "epoch": 0.5621822448181463, + "loss": 0.22559820115566254, + "loss_ce": 0.019451960921287537, + "loss_iou": 0.453125, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 557597968, + "step": 5750 + }, + { + "epoch": 0.5622800156433321, + "grad_norm": 4.626062368226178, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 557693832, + "step": 5751 + }, + { + "epoch": 0.5622800156433321, + "loss": 0.05321813374757767, + "loss_ce": 0.003009091131389141, + "loss_iou": 0.365234375, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 557693832, + "step": 5751 + }, + { + "epoch": 0.5623777864685178, + "grad_norm": 12.448993379629394, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 557791536, + "step": 5752 + }, + { + "epoch": 0.5623777864685178, + "loss": 0.08122356235980988, + "loss_ce": 0.0075236172415316105, + "loss_iou": 0.353515625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 557791536, + "step": 5752 + }, + { + "epoch": 0.5624755572937036, + "grad_norm": 15.973132115425301, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 557888528, + "step": 5753 + }, + { + "epoch": 0.5624755572937036, + "loss": 0.08999935537576675, + "loss_ce": 0.012667816132307053, + "loss_iou": 0.33984375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 557888528, + "step": 5753 + }, + { + "epoch": 0.5625733281188893, + "grad_norm": 4.562871941261027, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 557984552, + "step": 5754 + }, + { + "epoch": 0.5625733281188893, + "loss": 0.08568723499774933, + "loss_ce": 0.0049987598322331905, + "loss_iou": 0.31640625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 557984552, + "step": 5754 + }, + { + "epoch": 0.5626710989440751, + "grad_norm": 3.6620532553308127, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 558081796, + "step": 5755 + }, + { + "epoch": 0.5626710989440751, + "loss": 0.07445482164621353, + "loss_ce": 0.006797350011765957, + "loss_iou": 0.353515625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 558081796, + "step": 5755 + }, + { + "epoch": 0.5627688697692609, + "grad_norm": 6.924399610850723, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 558178380, + "step": 5756 + }, + { + "epoch": 0.5627688697692609, + "loss": 0.08545921742916107, + "loss_ce": 0.0032143464777618647, + "loss_iou": 0.2890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 558178380, + "step": 5756 + }, + { + "epoch": 0.5628666405944466, + "grad_norm": 7.190647921231458, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 558274960, + "step": 5757 + }, + { + "epoch": 0.5628666405944466, + "loss": 0.07531082630157471, + "loss_ce": 0.006234282627701759, + "loss_iou": 0.328125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 558274960, + "step": 5757 + }, + { + "epoch": 0.5629644114196324, + "grad_norm": 5.112044276672704, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 558372240, + "step": 5758 + }, + { + "epoch": 0.5629644114196324, + "loss": 0.11800210177898407, + "loss_ce": 0.0044843414798378944, + "loss_iou": 0.2392578125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 558372240, + "step": 5758 + }, + { + "epoch": 0.5630621822448182, + "grad_norm": 2.8996713279598127, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 558469832, + "step": 5759 + }, + { + "epoch": 0.5630621822448182, + "loss": 0.05594710633158684, + "loss_ce": 0.003807825967669487, + "loss_iou": 0.298828125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 558469832, + "step": 5759 + }, + { + "epoch": 0.5631599530700039, + "grad_norm": 13.652275088114427, + "learning_rate": 5e-05, + "loss": 0.0945, + "num_input_tokens_seen": 558567024, + "step": 5760 + }, + { + "epoch": 0.5631599530700039, + "loss": 0.0698123574256897, + "loss_ce": 0.004016464576125145, + "loss_iou": 0.291015625, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 558567024, + "step": 5760 + }, + { + "epoch": 0.5632577238951897, + "grad_norm": 6.313378052874654, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 558664832, + "step": 5761 + }, + { + "epoch": 0.5632577238951897, + "loss": 0.07278096675872803, + "loss_ce": 0.0044749947264790535, + "loss_iou": 0.30078125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 558664832, + "step": 5761 + }, + { + "epoch": 0.5633554947203755, + "grad_norm": 11.915577586610215, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 558760840, + "step": 5762 + }, + { + "epoch": 0.5633554947203755, + "loss": 0.06696947664022446, + "loss_ce": 0.002371395006775856, + "loss_iou": 0.28125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 558760840, + "step": 5762 + }, + { + "epoch": 0.5634532655455612, + "grad_norm": 4.922350681940632, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 558858392, + "step": 5763 + }, + { + "epoch": 0.5634532655455612, + "loss": 0.06223051995038986, + "loss_ce": 0.0034040743485093117, + "loss_iou": 0.2734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 558858392, + "step": 5763 + }, + { + "epoch": 0.563551036370747, + "grad_norm": 5.028859706743291, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 558955440, + "step": 5764 + }, + { + "epoch": 0.563551036370747, + "loss": 0.0834750235080719, + "loss_ce": 0.005914597772061825, + "loss_iou": 0.181640625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 558955440, + "step": 5764 + }, + { + "epoch": 0.5636488071959327, + "grad_norm": 10.576999129670769, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 559052252, + "step": 5765 + }, + { + "epoch": 0.5636488071959327, + "loss": 0.11190760135650635, + "loss_ce": 0.008010504767298698, + "loss_iou": 0.296875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 559052252, + "step": 5765 + }, + { + "epoch": 0.5637465780211185, + "grad_norm": 15.071263181945739, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 559148876, + "step": 5766 + }, + { + "epoch": 0.5637465780211185, + "loss": 0.0630464106798172, + "loss_ce": 0.0025147912092506886, + "loss_iou": 0.251953125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 559148876, + "step": 5766 + }, + { + "epoch": 0.5638443488463043, + "grad_norm": 11.590991433612306, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 559245360, + "step": 5767 + }, + { + "epoch": 0.5638443488463043, + "loss": 0.07563803344964981, + "loss_ce": 0.0014345470117405057, + "loss_iou": 0.30078125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 559245360, + "step": 5767 + }, + { + "epoch": 0.56394211967149, + "grad_norm": 6.882013423490929, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 559342128, + "step": 5768 + }, + { + "epoch": 0.56394211967149, + "loss": 0.037663258612155914, + "loss_ce": 0.00422362145036459, + "loss_iou": 0.349609375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 559342128, + "step": 5768 + }, + { + "epoch": 0.5640398904966758, + "grad_norm": 4.7947345302510564, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 559440056, + "step": 5769 + }, + { + "epoch": 0.5640398904966758, + "loss": 0.11263275891542435, + "loss_ce": 0.03646088391542435, + "loss_iou": 0.30078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 559440056, + "step": 5769 + }, + { + "epoch": 0.5641376613218616, + "grad_norm": 8.770621655301296, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 559536320, + "step": 5770 + }, + { + "epoch": 0.5641376613218616, + "loss": 0.08053985238075256, + "loss_ce": 0.005558160599321127, + "loss_iou": 0.275390625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 559536320, + "step": 5770 + }, + { + "epoch": 0.5642354321470473, + "grad_norm": 8.121814137856067, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 559632896, + "step": 5771 + }, + { + "epoch": 0.5642354321470473, + "loss": 0.056854162365198135, + "loss_ce": 0.004760659299790859, + "loss_iou": 0.2265625, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 559632896, + "step": 5771 + }, + { + "epoch": 0.5643332029722331, + "grad_norm": 3.53953306793839, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 559729924, + "step": 5772 + }, + { + "epoch": 0.5643332029722331, + "loss": 0.06189803406596184, + "loss_ce": 0.004690926056355238, + "loss_iou": 0.26171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 559729924, + "step": 5772 + }, + { + "epoch": 0.5644309737974188, + "grad_norm": 10.23499761069106, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 559827660, + "step": 5773 + }, + { + "epoch": 0.5644309737974188, + "loss": 0.08445225656032562, + "loss_ce": 0.004847155883908272, + "loss_iou": 0.375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 559827660, + "step": 5773 + }, + { + "epoch": 0.5645287446226046, + "grad_norm": 11.62114905397806, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 559925144, + "step": 5774 + }, + { + "epoch": 0.5645287446226046, + "loss": 0.07771219313144684, + "loss_ce": 0.005564821884036064, + "loss_iou": 0.26953125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 559925144, + "step": 5774 + }, + { + "epoch": 0.5646265154477904, + "grad_norm": 11.44090094198165, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 560022356, + "step": 5775 + }, + { + "epoch": 0.5646265154477904, + "loss": 0.10465137660503387, + "loss_ce": 0.005179330240935087, + "loss_iou": 0.33203125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 560022356, + "step": 5775 + }, + { + "epoch": 0.5647242862729761, + "grad_norm": 6.703815388267641, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 560119316, + "step": 5776 + }, + { + "epoch": 0.5647242862729761, + "loss": 0.06626832485198975, + "loss_ce": 0.002608658280223608, + "loss_iou": 0.298828125, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 560119316, + "step": 5776 + }, + { + "epoch": 0.5648220570981619, + "grad_norm": 4.362079182623384, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 560216368, + "step": 5777 + }, + { + "epoch": 0.5648220570981619, + "loss": 0.1131865382194519, + "loss_ce": 0.006619147956371307, + "loss_iou": 0.28515625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 560216368, + "step": 5777 + }, + { + "epoch": 0.5649198279233477, + "grad_norm": 13.597129527128708, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 560313768, + "step": 5778 + }, + { + "epoch": 0.5649198279233477, + "loss": 0.10593844205141068, + "loss_ce": 0.004665861837565899, + "loss_iou": 0.283203125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 560313768, + "step": 5778 + }, + { + "epoch": 0.5650175987485334, + "grad_norm": 10.391302854361907, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 560409388, + "step": 5779 + }, + { + "epoch": 0.5650175987485334, + "loss": 0.18261554837226868, + "loss_ce": 0.006498593837022781, + "loss_iou": 0.2890625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 560409388, + "step": 5779 + }, + { + "epoch": 0.5651153695737192, + "grad_norm": 9.253177574908579, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 560506276, + "step": 5780 + }, + { + "epoch": 0.5651153695737192, + "loss": 0.08161179721355438, + "loss_ce": 0.006874243728816509, + "loss_iou": 0.31640625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 560506276, + "step": 5780 + }, + { + "epoch": 0.5652131403989049, + "grad_norm": 5.82049642462608, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 560603584, + "step": 5781 + }, + { + "epoch": 0.5652131403989049, + "loss": 0.06333347409963608, + "loss_ce": 0.005228008143603802, + "loss_iou": 0.2412109375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 560603584, + "step": 5781 + }, + { + "epoch": 0.5653109112240907, + "grad_norm": 31.54870187444142, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 560700832, + "step": 5782 + }, + { + "epoch": 0.5653109112240907, + "loss": 0.044604986906051636, + "loss_ce": 0.0038640229031443596, + "loss_iou": 0.32421875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 560700832, + "step": 5782 + }, + { + "epoch": 0.5654086820492765, + "grad_norm": 6.5915771504620135, + "learning_rate": 5e-05, + "loss": 0.1228, + "num_input_tokens_seen": 560796448, + "step": 5783 + }, + { + "epoch": 0.5654086820492765, + "loss": 0.14282754063606262, + "loss_ce": 0.009038479067385197, + "loss_iou": 0.2353515625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 560796448, + "step": 5783 + }, + { + "epoch": 0.5655064528744622, + "grad_norm": 11.826814398401668, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 560894056, + "step": 5784 + }, + { + "epoch": 0.5655064528744622, + "loss": 0.05632762610912323, + "loss_ce": 0.006141467019915581, + "loss_iou": 0.34765625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 560894056, + "step": 5784 + }, + { + "epoch": 0.565604223699648, + "grad_norm": 12.005396326534322, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 560990772, + "step": 5785 + }, + { + "epoch": 0.565604223699648, + "loss": 0.08774147927761078, + "loss_ce": 0.002971281763166189, + "loss_iou": 0.328125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 560990772, + "step": 5785 + }, + { + "epoch": 0.5657019945248338, + "grad_norm": 12.418871335161214, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 561088076, + "step": 5786 + }, + { + "epoch": 0.5657019945248338, + "loss": 0.11957675218582153, + "loss_ce": 0.007088949903845787, + "loss_iou": 0.345703125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 561088076, + "step": 5786 + }, + { + "epoch": 0.5657997653500195, + "grad_norm": 5.325564458670678, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 561184724, + "step": 5787 + }, + { + "epoch": 0.5657997653500195, + "loss": 0.08084450662136078, + "loss_ce": 0.00769386999309063, + "loss_iou": 0.27734375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 561184724, + "step": 5787 + }, + { + "epoch": 0.5658975361752053, + "grad_norm": 4.1695191976378085, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 561281800, + "step": 5788 + }, + { + "epoch": 0.5658975361752053, + "loss": 0.06713615357875824, + "loss_ce": 0.006314622238278389, + "loss_iou": 0.326171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 561281800, + "step": 5788 + }, + { + "epoch": 0.5659953070003911, + "grad_norm": 11.033305161658374, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 561379140, + "step": 5789 + }, + { + "epoch": 0.5659953070003911, + "loss": 0.07883112132549286, + "loss_ce": 0.004314830061048269, + "loss_iou": 0.271484375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 561379140, + "step": 5789 + }, + { + "epoch": 0.5660930778255768, + "grad_norm": 4.004732268999122, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 561475760, + "step": 5790 + }, + { + "epoch": 0.5660930778255768, + "loss": 0.04866491258144379, + "loss_ce": 0.005604612175375223, + "loss_iou": 0.306640625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 561475760, + "step": 5790 + }, + { + "epoch": 0.5661908486507626, + "grad_norm": 12.245831709301685, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 561573972, + "step": 5791 + }, + { + "epoch": 0.5661908486507626, + "loss": 0.0461144745349884, + "loss_ce": 0.005098852328956127, + "loss_iou": 0.416015625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 561573972, + "step": 5791 + }, + { + "epoch": 0.5662886194759483, + "grad_norm": 45.24045036471791, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 561671088, + "step": 5792 + }, + { + "epoch": 0.5662886194759483, + "loss": 0.05267101526260376, + "loss_ce": 0.004529532510787249, + "loss_iou": 0.396484375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 561671088, + "step": 5792 + }, + { + "epoch": 0.5663863903011341, + "grad_norm": 17.17007276978216, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 561767684, + "step": 5793 + }, + { + "epoch": 0.5663863903011341, + "loss": 0.07611197233200073, + "loss_ce": 0.00391501234844327, + "loss_iou": 0.30078125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 561767684, + "step": 5793 + }, + { + "epoch": 0.5664841611263199, + "grad_norm": 3.94647527366423, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 561863980, + "step": 5794 + }, + { + "epoch": 0.5664841611263199, + "loss": 0.08298055827617645, + "loss_ce": 0.0053095025941729546, + "loss_iou": 0.2490234375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 561863980, + "step": 5794 + }, + { + "epoch": 0.5665819319515056, + "grad_norm": 13.194248830451421, + "learning_rate": 5e-05, + "loss": 0.1305, + "num_input_tokens_seen": 561961288, + "step": 5795 + }, + { + "epoch": 0.5665819319515056, + "loss": 0.14707782864570618, + "loss_ce": 0.013258259743452072, + "loss_iou": 0.224609375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 561961288, + "step": 5795 + }, + { + "epoch": 0.5666797027766914, + "grad_norm": 16.634481390815818, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 562058544, + "step": 5796 + }, + { + "epoch": 0.5666797027766914, + "loss": 0.08387631177902222, + "loss_ce": 0.005064667202532291, + "loss_iou": 0.322265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 562058544, + "step": 5796 + }, + { + "epoch": 0.5667774736018772, + "grad_norm": 30.433994535085922, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 562155088, + "step": 5797 + }, + { + "epoch": 0.5667774736018772, + "loss": 0.17453715205192566, + "loss_ce": 0.00436350516974926, + "loss_iou": 0.1865234375, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 562155088, + "step": 5797 + }, + { + "epoch": 0.5668752444270629, + "grad_norm": 19.43384675442165, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 562252496, + "step": 5798 + }, + { + "epoch": 0.5668752444270629, + "loss": 0.07182684540748596, + "loss_ce": 0.005725770723074675, + "loss_iou": 0.3125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 562252496, + "step": 5798 + }, + { + "epoch": 0.5669730152522487, + "grad_norm": 7.689694148679254, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 562348652, + "step": 5799 + }, + { + "epoch": 0.5669730152522487, + "loss": 0.05134668946266174, + "loss_ce": 0.0014618942514061928, + "loss_iou": 0.2890625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 562348652, + "step": 5799 + }, + { + "epoch": 0.5670707860774344, + "grad_norm": 2.842316212731154, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 562445972, + "step": 5800 + }, + { + "epoch": 0.5670707860774344, + "loss": 0.044848568737506866, + "loss_ce": 0.005450377240777016, + "loss_iou": 0.328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 562445972, + "step": 5800 + }, + { + "epoch": 0.5671685569026202, + "grad_norm": 10.486665393284822, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 562544376, + "step": 5801 + }, + { + "epoch": 0.5671685569026202, + "loss": 0.07961012423038483, + "loss_ce": 0.0035908410791307688, + "loss_iou": 0.361328125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 562544376, + "step": 5801 + }, + { + "epoch": 0.567266327727806, + "grad_norm": 28.782445128665735, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 562642024, + "step": 5802 + }, + { + "epoch": 0.567266327727806, + "loss": 0.12762048840522766, + "loss_ce": 0.007320197764784098, + "loss_iou": 0.34375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 562642024, + "step": 5802 + }, + { + "epoch": 0.5673640985529917, + "grad_norm": 10.831900440604938, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 562739560, + "step": 5803 + }, + { + "epoch": 0.5673640985529917, + "loss": 0.07679665833711624, + "loss_ce": 0.0047751739621162415, + "loss_iou": 0.234375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 562739560, + "step": 5803 + }, + { + "epoch": 0.5674618693781776, + "grad_norm": 7.053888824387127, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 562836412, + "step": 5804 + }, + { + "epoch": 0.5674618693781776, + "loss": 0.07417960464954376, + "loss_ce": 0.003973911050707102, + "loss_iou": 0.3515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 562836412, + "step": 5804 + }, + { + "epoch": 0.5675596402033634, + "grad_norm": 7.874606182983498, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 562932772, + "step": 5805 + }, + { + "epoch": 0.5675596402033634, + "loss": 0.14758263528347015, + "loss_ce": 0.004195796325802803, + "loss_iou": 0.33203125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 562932772, + "step": 5805 + }, + { + "epoch": 0.567657411028549, + "grad_norm": 10.79227457908471, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 563030008, + "step": 5806 + }, + { + "epoch": 0.567657411028549, + "loss": 0.09003740549087524, + "loss_ce": 0.004649214446544647, + "loss_iou": 0.318359375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 563030008, + "step": 5806 + }, + { + "epoch": 0.5677551818537349, + "grad_norm": 10.657210051443297, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 563127064, + "step": 5807 + }, + { + "epoch": 0.5677551818537349, + "loss": 0.09069293737411499, + "loss_ce": 0.0030769729055464268, + "loss_iou": 0.353515625, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 563127064, + "step": 5807 + }, + { + "epoch": 0.5678529526789207, + "grad_norm": 4.1625970931519305, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 563223208, + "step": 5808 + }, + { + "epoch": 0.5678529526789207, + "loss": 0.06987902522087097, + "loss_ce": 0.004617180209606886, + "loss_iou": 0.34375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 563223208, + "step": 5808 + }, + { + "epoch": 0.5679507235041064, + "grad_norm": 5.818941622001936, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 563320076, + "step": 5809 + }, + { + "epoch": 0.5679507235041064, + "loss": 0.1147497147321701, + "loss_ce": 0.005687524098902941, + "loss_iou": 0.26953125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 563320076, + "step": 5809 + }, + { + "epoch": 0.5680484943292922, + "grad_norm": 9.432467811777327, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 563417648, + "step": 5810 + }, + { + "epoch": 0.5680484943292922, + "loss": 0.07367821037769318, + "loss_ce": 0.004922103136777878, + "loss_iou": 0.32421875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 563417648, + "step": 5810 + }, + { + "epoch": 0.5681462651544779, + "grad_norm": 6.461129183666913, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 563514752, + "step": 5811 + }, + { + "epoch": 0.5681462651544779, + "loss": 0.06425800919532776, + "loss_ce": 0.005420120432972908, + "loss_iou": 0.251953125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 563514752, + "step": 5811 + }, + { + "epoch": 0.5682440359796637, + "grad_norm": 12.082798863526355, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 563611112, + "step": 5812 + }, + { + "epoch": 0.5682440359796637, + "loss": 0.13396134972572327, + "loss_ce": 0.006947190035134554, + "loss_iou": 0.26171875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 563611112, + "step": 5812 + }, + { + "epoch": 0.5683418068048495, + "grad_norm": 4.408862052078089, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 563708156, + "step": 5813 + }, + { + "epoch": 0.5683418068048495, + "loss": 0.08757676184177399, + "loss_ce": 0.004095921292901039, + "loss_iou": 0.306640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 563708156, + "step": 5813 + }, + { + "epoch": 0.5684395776300352, + "grad_norm": 4.46470932948371, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 563806008, + "step": 5814 + }, + { + "epoch": 0.5684395776300352, + "loss": 0.04372221231460571, + "loss_ce": 0.004438459407538176, + "loss_iou": 0.314453125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 563806008, + "step": 5814 + }, + { + "epoch": 0.568537348455221, + "grad_norm": 3.600066117484207, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 563903088, + "step": 5815 + }, + { + "epoch": 0.568537348455221, + "loss": 0.05956098437309265, + "loss_ce": 0.002805917989462614, + "loss_iou": 0.376953125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 563903088, + "step": 5815 + }, + { + "epoch": 0.5686351192804068, + "grad_norm": 4.815191883235732, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 563999320, + "step": 5816 + }, + { + "epoch": 0.5686351192804068, + "loss": 0.08953070640563965, + "loss_ce": 0.005744693335145712, + "loss_iou": 0.2490234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 563999320, + "step": 5816 + }, + { + "epoch": 0.5687328901055925, + "grad_norm": 2.8173457817856513, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 564096448, + "step": 5817 + }, + { + "epoch": 0.5687328901055925, + "loss": 0.07994775474071503, + "loss_ce": 0.0054390924051404, + "loss_iou": 0.306640625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 564096448, + "step": 5817 + }, + { + "epoch": 0.5688306609307783, + "grad_norm": 4.693262918869492, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 564193252, + "step": 5818 + }, + { + "epoch": 0.5688306609307783, + "loss": 0.06875482946634293, + "loss_ce": 0.0048128776252269745, + "loss_iou": 0.359375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 564193252, + "step": 5818 + }, + { + "epoch": 0.568928431755964, + "grad_norm": 21.44015503976979, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 564288900, + "step": 5819 + }, + { + "epoch": 0.568928431755964, + "loss": 0.0782214105129242, + "loss_ce": 0.0031786910258233547, + "loss_iou": 0.2041015625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 564288900, + "step": 5819 + }, + { + "epoch": 0.5690262025811498, + "grad_norm": 12.997778605586976, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 564384720, + "step": 5820 + }, + { + "epoch": 0.5690262025811498, + "loss": 0.07668246328830719, + "loss_ce": 0.0032266569323837757, + "loss_iou": 0.3203125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 564384720, + "step": 5820 + }, + { + "epoch": 0.5691239734063356, + "grad_norm": 3.2799546008787592, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 564481504, + "step": 5821 + }, + { + "epoch": 0.5691239734063356, + "loss": 0.04456471651792526, + "loss_ce": 0.0068449899554252625, + "loss_iou": 0.2392578125, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 564481504, + "step": 5821 + }, + { + "epoch": 0.5692217442315213, + "grad_norm": 24.71256301302857, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 564577912, + "step": 5822 + }, + { + "epoch": 0.5692217442315213, + "loss": 0.08037649095058441, + "loss_ce": 0.005562645383179188, + "loss_iou": 0.25390625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 564577912, + "step": 5822 + }, + { + "epoch": 0.5693195150567071, + "grad_norm": 2.466723044882972, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 564674748, + "step": 5823 + }, + { + "epoch": 0.5693195150567071, + "loss": 0.05805940181016922, + "loss_ce": 0.0035855253227055073, + "loss_iou": 0.31640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 564674748, + "step": 5823 + }, + { + "epoch": 0.5694172858818929, + "grad_norm": 6.981830821855556, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 564771672, + "step": 5824 + }, + { + "epoch": 0.5694172858818929, + "loss": 0.1063183844089508, + "loss_ce": 0.005305204540491104, + "loss_iou": 0.3125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 564771672, + "step": 5824 + }, + { + "epoch": 0.5695150567070786, + "grad_norm": 21.316808627612474, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 564869220, + "step": 5825 + }, + { + "epoch": 0.5695150567070786, + "loss": 0.05893588066101074, + "loss_ce": 0.0029056058265268803, + "loss_iou": 0.326171875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 564869220, + "step": 5825 + }, + { + "epoch": 0.5696128275322644, + "grad_norm": 3.5677913464674713, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 564967256, + "step": 5826 + }, + { + "epoch": 0.5696128275322644, + "loss": 0.07342532277107239, + "loss_ce": 0.0023651402443647385, + "loss_iou": 0.40234375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 564967256, + "step": 5826 + }, + { + "epoch": 0.5697105983574501, + "grad_norm": 6.889739096410188, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 565063764, + "step": 5827 + }, + { + "epoch": 0.5697105983574501, + "loss": 0.08888016641139984, + "loss_ce": 0.005284888669848442, + "loss_iou": 0.330078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 565063764, + "step": 5827 + }, + { + "epoch": 0.5698083691826359, + "grad_norm": 4.210003798729672, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 565160176, + "step": 5828 + }, + { + "epoch": 0.5698083691826359, + "loss": 0.05832204222679138, + "loss_ce": 0.007037249859422445, + "loss_iou": 0.369140625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 565160176, + "step": 5828 + }, + { + "epoch": 0.5699061400078217, + "grad_norm": 3.3629373183078095, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 565257388, + "step": 5829 + }, + { + "epoch": 0.5699061400078217, + "loss": 0.060098253190517426, + "loss_ce": 0.0055938600562512875, + "loss_iou": 0.2578125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 565257388, + "step": 5829 + }, + { + "epoch": 0.5700039108330074, + "grad_norm": 7.66351942621761, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 565353984, + "step": 5830 + }, + { + "epoch": 0.5700039108330074, + "loss": 0.07911619544029236, + "loss_ce": 0.006779998075217009, + "loss_iou": 0.328125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 565353984, + "step": 5830 + }, + { + "epoch": 0.5701016816581932, + "grad_norm": 3.7893089649800693, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 565450424, + "step": 5831 + }, + { + "epoch": 0.5701016816581932, + "loss": 0.06772544234991074, + "loss_ce": 0.004004737362265587, + "loss_iou": 0.3671875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 565450424, + "step": 5831 + }, + { + "epoch": 0.570199452483379, + "grad_norm": 7.583810139727326, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 565548180, + "step": 5832 + }, + { + "epoch": 0.570199452483379, + "loss": 0.11889895796775818, + "loss_ce": 0.005404084920883179, + "loss_iou": 0.265625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 565548180, + "step": 5832 + }, + { + "epoch": 0.5702972233085647, + "grad_norm": 9.110046172263502, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 565644832, + "step": 5833 + }, + { + "epoch": 0.5702972233085647, + "loss": 0.08304429799318314, + "loss_ce": 0.0027830679900944233, + "loss_iou": 0.27734375, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 565644832, + "step": 5833 + }, + { + "epoch": 0.5703949941337505, + "grad_norm": 8.521010584641239, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 565741584, + "step": 5834 + }, + { + "epoch": 0.5703949941337505, + "loss": 0.08912961930036545, + "loss_ce": 0.005068955942988396, + "loss_iou": 0.375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 565741584, + "step": 5834 + }, + { + "epoch": 0.5704927649589363, + "grad_norm": 3.3021291501970076, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 565838440, + "step": 5835 + }, + { + "epoch": 0.5704927649589363, + "loss": 0.0790630355477333, + "loss_ce": 0.014236069284379482, + "loss_iou": 0.255859375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 565838440, + "step": 5835 + }, + { + "epoch": 0.570590535784122, + "grad_norm": 6.071167024663304, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 565935080, + "step": 5836 + }, + { + "epoch": 0.570590535784122, + "loss": 0.12130773067474365, + "loss_ce": 0.009292962029576302, + "loss_iou": 0.3359375, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 565935080, + "step": 5836 + }, + { + "epoch": 0.5706883066093078, + "grad_norm": 3.8637180427520152, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 566031752, + "step": 5837 + }, + { + "epoch": 0.5706883066093078, + "loss": 0.08319903165102005, + "loss_ce": 0.004719261080026627, + "loss_iou": 0.240234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 566031752, + "step": 5837 + }, + { + "epoch": 0.5707860774344935, + "grad_norm": 11.177501657909083, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 566128356, + "step": 5838 + }, + { + "epoch": 0.5707860774344935, + "loss": 0.07288457453250885, + "loss_ce": 0.007409111596643925, + "loss_iou": 0.26171875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 566128356, + "step": 5838 + }, + { + "epoch": 0.5708838482596793, + "grad_norm": 11.832993545350247, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 566224672, + "step": 5839 + }, + { + "epoch": 0.5708838482596793, + "loss": 0.04195277392864227, + "loss_ce": 0.004686994943767786, + "loss_iou": 0.25, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 566224672, + "step": 5839 + }, + { + "epoch": 0.5709816190848651, + "grad_norm": 16.06659100884007, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 566321704, + "step": 5840 + }, + { + "epoch": 0.5709816190848651, + "loss": 0.13372057676315308, + "loss_ce": 0.011528189294040203, + "loss_iou": 0.357421875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 566321704, + "step": 5840 + }, + { + "epoch": 0.5710793899100508, + "grad_norm": 11.00785085445619, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 566418180, + "step": 5841 + }, + { + "epoch": 0.5710793899100508, + "loss": 0.09130154550075531, + "loss_ce": 0.00902615673840046, + "loss_iou": 0.2001953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 566418180, + "step": 5841 + }, + { + "epoch": 0.5711771607352366, + "grad_norm": 5.309214692046311, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 566514904, + "step": 5842 + }, + { + "epoch": 0.5711771607352366, + "loss": 0.06543370336294174, + "loss_ce": 0.006992541253566742, + "loss_iou": 0.328125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 566514904, + "step": 5842 + }, + { + "epoch": 0.5712749315604224, + "grad_norm": 3.266260933128955, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 566611672, + "step": 5843 + }, + { + "epoch": 0.5712749315604224, + "loss": 0.0825410932302475, + "loss_ce": 0.009329422377049923, + "loss_iou": 0.33203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 566611672, + "step": 5843 + }, + { + "epoch": 0.5713727023856081, + "grad_norm": 29.74961780417928, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 566708620, + "step": 5844 + }, + { + "epoch": 0.5713727023856081, + "loss": 0.07797110080718994, + "loss_ce": 0.00751745468005538, + "loss_iou": 0.208984375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 566708620, + "step": 5844 + }, + { + "epoch": 0.5714704732107939, + "grad_norm": 10.702328715557373, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 566805848, + "step": 5845 + }, + { + "epoch": 0.5714704732107939, + "loss": 0.10410818457603455, + "loss_ce": 0.0052770040929317474, + "loss_iou": 0.25390625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 566805848, + "step": 5845 + }, + { + "epoch": 0.5715682440359796, + "grad_norm": 7.48424721279604, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 566901900, + "step": 5846 + }, + { + "epoch": 0.5715682440359796, + "loss": 0.07468049228191376, + "loss_ce": 0.0038949698209762573, + "loss_iou": 0.12890625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 566901900, + "step": 5846 + }, + { + "epoch": 0.5716660148611654, + "grad_norm": 14.202985594093507, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 566999100, + "step": 5847 + }, + { + "epoch": 0.5716660148611654, + "loss": 0.06891459226608276, + "loss_ce": 0.00827616173774004, + "loss_iou": 0.369140625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 566999100, + "step": 5847 + }, + { + "epoch": 0.5717637856863512, + "grad_norm": 8.639412969350078, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 567096308, + "step": 5848 + }, + { + "epoch": 0.5717637856863512, + "loss": 0.03593707084655762, + "loss_ce": 0.0013759115245193243, + "loss_iou": 0.2734375, + "loss_num": 0.00689697265625, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 567096308, + "step": 5848 + }, + { + "epoch": 0.5718615565115369, + "grad_norm": 13.868317427526732, + "learning_rate": 5e-05, + "loss": 0.1186, + "num_input_tokens_seen": 567193228, + "step": 5849 + }, + { + "epoch": 0.5718615565115369, + "loss": 0.11394842714071274, + "loss_ce": 0.0021167579106986523, + "loss_iou": 0.25390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 567193228, + "step": 5849 + }, + { + "epoch": 0.5719593273367227, + "grad_norm": 11.12381522742187, + "learning_rate": 5e-05, + "loss": 0.1256, + "num_input_tokens_seen": 567289808, + "step": 5850 + }, + { + "epoch": 0.5719593273367227, + "loss": 0.11987175047397614, + "loss_ce": 0.003248831955716014, + "loss_iou": 0.375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 567289808, + "step": 5850 + }, + { + "epoch": 0.5720570981619085, + "grad_norm": 3.5084417223696938, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 567387052, + "step": 5851 + }, + { + "epoch": 0.5720570981619085, + "loss": 0.06572550535202026, + "loss_ce": 0.004888712428510189, + "loss_iou": 0.283203125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 567387052, + "step": 5851 + }, + { + "epoch": 0.5721548689870942, + "grad_norm": 5.375518780195213, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 567484384, + "step": 5852 + }, + { + "epoch": 0.5721548689870942, + "loss": 0.0898442417383194, + "loss_ce": 0.005783573724329472, + "loss_iou": 0.412109375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 567484384, + "step": 5852 + }, + { + "epoch": 0.57225263981228, + "grad_norm": 8.736010195248161, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 567581380, + "step": 5853 + }, + { + "epoch": 0.57225263981228, + "loss": 0.07172664999961853, + "loss_ce": 0.006037556566298008, + "loss_iou": 0.2412109375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 567581380, + "step": 5853 + }, + { + "epoch": 0.5723504106374658, + "grad_norm": 5.070826165145606, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 567678704, + "step": 5854 + }, + { + "epoch": 0.5723504106374658, + "loss": 0.07550220936536789, + "loss_ce": 0.0036180540919303894, + "loss_iou": 0.333984375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 567678704, + "step": 5854 + }, + { + "epoch": 0.5724481814626515, + "grad_norm": 6.6642110976274465, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 567775272, + "step": 5855 + }, + { + "epoch": 0.5724481814626515, + "loss": 0.0699806958436966, + "loss_ce": 0.004795149900019169, + "loss_iou": 0.263671875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 567775272, + "step": 5855 + }, + { + "epoch": 0.5725459522878373, + "grad_norm": 9.83087996248281, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 567872532, + "step": 5856 + }, + { + "epoch": 0.5725459522878373, + "loss": 0.15033668279647827, + "loss_ce": 0.01193946972489357, + "loss_iou": 0.380859375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 567872532, + "step": 5856 + }, + { + "epoch": 0.572643723113023, + "grad_norm": 11.158717291078853, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 567969404, + "step": 5857 + }, + { + "epoch": 0.572643723113023, + "loss": 0.12869639694690704, + "loss_ce": 0.002536735264584422, + "loss_iou": 0.337890625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 567969404, + "step": 5857 + }, + { + "epoch": 0.5727414939382088, + "grad_norm": 2.6278096848333274, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 568066224, + "step": 5858 + }, + { + "epoch": 0.5727414939382088, + "loss": 0.06571393460035324, + "loss_ce": 0.009409005753695965, + "loss_iou": 0.34375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 568066224, + "step": 5858 + }, + { + "epoch": 0.5728392647633946, + "grad_norm": 5.505479714938939, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 568163260, + "step": 5859 + }, + { + "epoch": 0.5728392647633946, + "loss": 0.0740143284201622, + "loss_ce": 0.004754683468490839, + "loss_iou": 0.328125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 568163260, + "step": 5859 + }, + { + "epoch": 0.5729370355885803, + "grad_norm": 6.9500362066543, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 568260064, + "step": 5860 + }, + { + "epoch": 0.5729370355885803, + "loss": 0.05589980632066727, + "loss_ce": 0.006674956064671278, + "loss_iou": 0.330078125, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 568260064, + "step": 5860 + }, + { + "epoch": 0.5730348064137661, + "grad_norm": 5.942293794988367, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 568355960, + "step": 5861 + }, + { + "epoch": 0.5730348064137661, + "loss": 0.08554087579250336, + "loss_ce": 0.007316690869629383, + "loss_iou": 0.30078125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 568355960, + "step": 5861 + }, + { + "epoch": 0.573132577238952, + "grad_norm": 8.741852801857329, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 568451076, + "step": 5862 + }, + { + "epoch": 0.573132577238952, + "loss": 0.061674606055021286, + "loss_ce": 0.005125534720718861, + "loss_iou": 0.29296875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 568451076, + "step": 5862 + }, + { + "epoch": 0.5732303480641376, + "grad_norm": 7.914894701360605, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 568548672, + "step": 5863 + }, + { + "epoch": 0.5732303480641376, + "loss": 0.05558505654335022, + "loss_ce": 0.001996190520003438, + "loss_iou": 0.271484375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 568548672, + "step": 5863 + }, + { + "epoch": 0.5733281188893234, + "grad_norm": 3.3503660818017043, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 568644760, + "step": 5864 + }, + { + "epoch": 0.5733281188893234, + "loss": 0.04447105899453163, + "loss_ce": 0.003844534745439887, + "loss_iou": 0.25390625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 568644760, + "step": 5864 + }, + { + "epoch": 0.5734258897145091, + "grad_norm": 7.014524456437983, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 568742208, + "step": 5865 + }, + { + "epoch": 0.5734258897145091, + "loss": 0.072858527302742, + "loss_ce": 0.002279001986607909, + "loss_iou": 0.318359375, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 568742208, + "step": 5865 + }, + { + "epoch": 0.573523660539695, + "grad_norm": 5.3928328584829, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 568839236, + "step": 5866 + }, + { + "epoch": 0.573523660539695, + "loss": 0.10139875113964081, + "loss_ce": 0.00885420385748148, + "loss_iou": 0.326171875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 568839236, + "step": 5866 + }, + { + "epoch": 0.5736214313648808, + "grad_norm": 4.260065775664883, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 568935728, + "step": 5867 + }, + { + "epoch": 0.5736214313648808, + "loss": 0.09537672996520996, + "loss_ce": 0.008447403088212013, + "loss_iou": 0.314453125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 568935728, + "step": 5867 + }, + { + "epoch": 0.5737192021900664, + "grad_norm": 17.36157587652996, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 569032324, + "step": 5868 + }, + { + "epoch": 0.5737192021900664, + "loss": 0.0955844447016716, + "loss_ce": 0.004420810844749212, + "loss_iou": 0.310546875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 569032324, + "step": 5868 + }, + { + "epoch": 0.5738169730152523, + "grad_norm": 4.500471597142611, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 569128936, + "step": 5869 + }, + { + "epoch": 0.5738169730152523, + "loss": 0.06474248319864273, + "loss_ce": 0.0051263924688100815, + "loss_iou": 0.328125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 569128936, + "step": 5869 + }, + { + "epoch": 0.5739147438404381, + "grad_norm": 11.79415796399535, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 569225984, + "step": 5870 + }, + { + "epoch": 0.5739147438404381, + "loss": 0.1021113395690918, + "loss_ce": 0.006118297576904297, + "loss_iou": 0.322265625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 569225984, + "step": 5870 + }, + { + "epoch": 0.5740125146656238, + "grad_norm": 20.611767647987694, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 569322420, + "step": 5871 + }, + { + "epoch": 0.5740125146656238, + "loss": 0.07069777697324753, + "loss_ce": 0.0057868920266628265, + "loss_iou": 0.2392578125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 569322420, + "step": 5871 + }, + { + "epoch": 0.5741102854908096, + "grad_norm": 9.688067222406923, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 569418852, + "step": 5872 + }, + { + "epoch": 0.5741102854908096, + "loss": 0.07323164492845535, + "loss_ce": 0.0064286659471690655, + "loss_iou": 0.341796875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 569418852, + "step": 5872 + }, + { + "epoch": 0.5742080563159953, + "grad_norm": 10.558063653699213, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 569515344, + "step": 5873 + }, + { + "epoch": 0.5742080563159953, + "loss": 0.04996725916862488, + "loss_ce": 0.0032295857090502977, + "loss_iou": 0.3125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 569515344, + "step": 5873 + }, + { + "epoch": 0.5743058271411811, + "grad_norm": 6.5747940788780905, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 569612168, + "step": 5874 + }, + { + "epoch": 0.5743058271411811, + "loss": 0.0622672438621521, + "loss_ce": 0.003536168485879898, + "loss_iou": 0.314453125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 569612168, + "step": 5874 + }, + { + "epoch": 0.5744035979663669, + "grad_norm": 5.902326862279468, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 569710024, + "step": 5875 + }, + { + "epoch": 0.5744035979663669, + "loss": 0.07146398723125458, + "loss_ce": 0.005332401487976313, + "loss_iou": 0.310546875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 569710024, + "step": 5875 + }, + { + "epoch": 0.5745013687915526, + "grad_norm": 6.7851345061566875, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 569806976, + "step": 5876 + }, + { + "epoch": 0.5745013687915526, + "loss": 0.06099073216319084, + "loss_ce": 0.008378427475690842, + "loss_iou": 0.349609375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 569806976, + "step": 5876 + }, + { + "epoch": 0.5745991396167384, + "grad_norm": 4.476624751989623, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 569902696, + "step": 5877 + }, + { + "epoch": 0.5745991396167384, + "loss": 0.05807649344205856, + "loss_ce": 0.005586255807429552, + "loss_iou": 0.1416015625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 569902696, + "step": 5877 + }, + { + "epoch": 0.5746969104419242, + "grad_norm": 5.600892908142785, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 569999296, + "step": 5878 + }, + { + "epoch": 0.5746969104419242, + "loss": 0.07731130719184875, + "loss_ce": 0.003802097635343671, + "loss_iou": 0.37109375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 569999296, + "step": 5878 + }, + { + "epoch": 0.5747946812671099, + "grad_norm": 7.188309556835887, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 570095224, + "step": 5879 + }, + { + "epoch": 0.5747946812671099, + "loss": 0.07342157512903214, + "loss_ce": 0.0039635635912418365, + "loss_iou": 0.2236328125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 570095224, + "step": 5879 + }, + { + "epoch": 0.5748924520922957, + "grad_norm": 12.630583414020396, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 570191752, + "step": 5880 + }, + { + "epoch": 0.5748924520922957, + "loss": 0.05836807191371918, + "loss_ce": 0.0031236244831234217, + "loss_iou": 0.2177734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 570191752, + "step": 5880 + }, + { + "epoch": 0.5749902229174815, + "grad_norm": 5.569915668882367, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 570288784, + "step": 5881 + }, + { + "epoch": 0.5749902229174815, + "loss": 0.1081087663769722, + "loss_ce": 0.008590948767960072, + "loss_iou": 0.208984375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 570288784, + "step": 5881 + }, + { + "epoch": 0.5750879937426672, + "grad_norm": 20.73888699019291, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 570385980, + "step": 5882 + }, + { + "epoch": 0.5750879937426672, + "loss": 0.057035140693187714, + "loss_ce": 0.0033089453354477882, + "loss_iou": 0.28515625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 570385980, + "step": 5882 + }, + { + "epoch": 0.575185764567853, + "grad_norm": 27.86175957325236, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 570483452, + "step": 5883 + }, + { + "epoch": 0.575185764567853, + "loss": 0.08978800475597382, + "loss_ce": 0.005040689837187529, + "loss_iou": 0.326171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 570483452, + "step": 5883 + }, + { + "epoch": 0.5752835353930387, + "grad_norm": 23.94190730152091, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 570580808, + "step": 5884 + }, + { + "epoch": 0.5752835353930387, + "loss": 0.09587658941745758, + "loss_ce": 0.006063361652195454, + "loss_iou": 0.318359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 570580808, + "step": 5884 + }, + { + "epoch": 0.5753813062182245, + "grad_norm": 4.5655649383228525, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 570677720, + "step": 5885 + }, + { + "epoch": 0.5753813062182245, + "loss": 0.10578398406505585, + "loss_ce": 0.00405362993478775, + "loss_iou": 0.365234375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 570677720, + "step": 5885 + }, + { + "epoch": 0.5754790770434103, + "grad_norm": 5.95526480998531, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 570774688, + "step": 5886 + }, + { + "epoch": 0.5754790770434103, + "loss": 0.0634550005197525, + "loss_ce": 0.005364793352782726, + "loss_iou": 0.314453125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 570774688, + "step": 5886 + }, + { + "epoch": 0.575576847868596, + "grad_norm": 9.501981043831364, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 570871288, + "step": 5887 + }, + { + "epoch": 0.575576847868596, + "loss": 0.11807125806808472, + "loss_ce": 0.004606906790286303, + "loss_iou": 0.291015625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 570871288, + "step": 5887 + }, + { + "epoch": 0.5756746186937818, + "grad_norm": 3.82972248913889, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 570968360, + "step": 5888 + }, + { + "epoch": 0.5756746186937818, + "loss": 0.12902036309242249, + "loss_ce": 0.006095545366406441, + "loss_iou": 0.396484375, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 570968360, + "step": 5888 + }, + { + "epoch": 0.5757723895189676, + "grad_norm": 6.362231875502696, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 571065272, + "step": 5889 + }, + { + "epoch": 0.5757723895189676, + "loss": 0.04950642213225365, + "loss_ce": 0.0011970987543463707, + "loss_iou": 0.322265625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 571065272, + "step": 5889 + }, + { + "epoch": 0.5758701603441533, + "grad_norm": 4.158634524583104, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 571162116, + "step": 5890 + }, + { + "epoch": 0.5758701603441533, + "loss": 0.10541079193353653, + "loss_ce": 0.004092431161552668, + "loss_iou": 0.365234375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 571162116, + "step": 5890 + }, + { + "epoch": 0.5759679311693391, + "grad_norm": 6.36351138075797, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 571259520, + "step": 5891 + }, + { + "epoch": 0.5759679311693391, + "loss": 0.06758753955364227, + "loss_ce": 0.0024782931432127953, + "loss_iou": 0.314453125, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 571259520, + "step": 5891 + }, + { + "epoch": 0.5760657019945248, + "grad_norm": 8.516979345079966, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 571357028, + "step": 5892 + }, + { + "epoch": 0.5760657019945248, + "loss": 0.09746576845645905, + "loss_ce": 0.00441767368465662, + "loss_iou": 0.28515625, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 571357028, + "step": 5892 + }, + { + "epoch": 0.5761634728197106, + "grad_norm": 3.3241394033165124, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 571454328, + "step": 5893 + }, + { + "epoch": 0.5761634728197106, + "loss": 0.08447425812482834, + "loss_ce": 0.003587416373193264, + "loss_iou": 0.26171875, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 571454328, + "step": 5893 + }, + { + "epoch": 0.5762612436448964, + "grad_norm": 3.3266232841822285, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 571550984, + "step": 5894 + }, + { + "epoch": 0.5762612436448964, + "loss": 0.09257584810256958, + "loss_ce": 0.0025184773840010166, + "loss_iou": 0.36328125, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 571550984, + "step": 5894 + }, + { + "epoch": 0.5763590144700821, + "grad_norm": 4.496740199979058, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 571646860, + "step": 5895 + }, + { + "epoch": 0.5763590144700821, + "loss": 0.09782049059867859, + "loss_ce": 0.004650329239666462, + "loss_iou": 0.435546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 571646860, + "step": 5895 + }, + { + "epoch": 0.5764567852952679, + "grad_norm": 7.824648401588304, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 571744272, + "step": 5896 + }, + { + "epoch": 0.5764567852952679, + "loss": 0.09064535796642303, + "loss_ce": 0.005302943754941225, + "loss_iou": 0.333984375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 571744272, + "step": 5896 + }, + { + "epoch": 0.5765545561204537, + "grad_norm": 14.702439147164464, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 571841808, + "step": 5897 + }, + { + "epoch": 0.5765545561204537, + "loss": 0.07046487927436829, + "loss_ce": 0.005126742646098137, + "loss_iou": 0.28515625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 571841808, + "step": 5897 + }, + { + "epoch": 0.5766523269456394, + "grad_norm": 10.208107369355353, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 571938636, + "step": 5898 + }, + { + "epoch": 0.5766523269456394, + "loss": 0.08863352239131927, + "loss_ce": 0.007792458403855562, + "loss_iou": 0.328125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 571938636, + "step": 5898 + }, + { + "epoch": 0.5767500977708252, + "grad_norm": 40.04510054000905, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 572035536, + "step": 5899 + }, + { + "epoch": 0.5767500977708252, + "loss": 0.08253136277198792, + "loss_ce": 0.004932785406708717, + "loss_iou": 0.2373046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 572035536, + "step": 5899 + }, + { + "epoch": 0.576847868596011, + "grad_norm": 23.798234020729108, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 572133100, + "step": 5900 + }, + { + "epoch": 0.576847868596011, + "loss": 0.10509711503982544, + "loss_ce": 0.0039618611335754395, + "loss_iou": 0.361328125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 572133100, + "step": 5900 + }, + { + "epoch": 0.5769456394211967, + "grad_norm": 3.8315303108031067, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 572230104, + "step": 5901 + }, + { + "epoch": 0.5769456394211967, + "loss": 0.0400024875998497, + "loss_ce": 0.0035816675517708063, + "loss_iou": 0.236328125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 572230104, + "step": 5901 + }, + { + "epoch": 0.5770434102463825, + "grad_norm": 4.544320397592646, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 572327100, + "step": 5902 + }, + { + "epoch": 0.5770434102463825, + "loss": 0.06232214719057083, + "loss_ce": 0.0068259285762906075, + "loss_iou": 0.265625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 572327100, + "step": 5902 + }, + { + "epoch": 0.5771411810715682, + "grad_norm": 2.0124048806853705, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 572424784, + "step": 5903 + }, + { + "epoch": 0.5771411810715682, + "loss": 0.06700199842453003, + "loss_ce": 0.003433882026001811, + "loss_iou": 0.1640625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 572424784, + "step": 5903 + }, + { + "epoch": 0.577238951896754, + "grad_norm": 9.851678443719697, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 572521968, + "step": 5904 + }, + { + "epoch": 0.577238951896754, + "loss": 0.09604997932910919, + "loss_ce": 0.008037285879254341, + "loss_iou": 0.251953125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 572521968, + "step": 5904 + }, + { + "epoch": 0.5773367227219398, + "grad_norm": 11.677409209987289, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 572618236, + "step": 5905 + }, + { + "epoch": 0.5773367227219398, + "loss": 0.06911155581474304, + "loss_ce": 0.0070464350283145905, + "loss_iou": 0.236328125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 572618236, + "step": 5905 + }, + { + "epoch": 0.5774344935471255, + "grad_norm": 5.331837044367291, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 572715744, + "step": 5906 + }, + { + "epoch": 0.5774344935471255, + "loss": 0.08101540803909302, + "loss_ce": 0.004217916633933783, + "loss_iou": 0.333984375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 572715744, + "step": 5906 + }, + { + "epoch": 0.5775322643723113, + "grad_norm": 3.2061015767878334, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 572813768, + "step": 5907 + }, + { + "epoch": 0.5775322643723113, + "loss": 0.09874729812145233, + "loss_ce": 0.008369483053684235, + "loss_iou": 0.314453125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 572813768, + "step": 5907 + }, + { + "epoch": 0.5776300351974971, + "grad_norm": 7.020863292346863, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 572910232, + "step": 5908 + }, + { + "epoch": 0.5776300351974971, + "loss": 0.06263433396816254, + "loss_ce": 0.006504878401756287, + "loss_iou": 0.224609375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 572910232, + "step": 5908 + }, + { + "epoch": 0.5777278060226828, + "grad_norm": 10.530051833544844, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 573006304, + "step": 5909 + }, + { + "epoch": 0.5777278060226828, + "loss": 0.10340338945388794, + "loss_ce": 0.010237039066851139, + "loss_iou": 0.2216796875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 573006304, + "step": 5909 + }, + { + "epoch": 0.5778255768478686, + "grad_norm": 11.600609012518529, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 573103628, + "step": 5910 + }, + { + "epoch": 0.5778255768478686, + "loss": 0.05812857300043106, + "loss_ce": 0.004471041262149811, + "loss_iou": 0.279296875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 573103628, + "step": 5910 + }, + { + "epoch": 0.5779233476730543, + "grad_norm": 7.7751756021005125, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 573200668, + "step": 5911 + }, + { + "epoch": 0.5779233476730543, + "loss": 0.12671825289726257, + "loss_ce": 0.010286051779985428, + "loss_iou": 0.2890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 573200668, + "step": 5911 + }, + { + "epoch": 0.5780211184982401, + "grad_norm": 2.4198286760494225, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 573297060, + "step": 5912 + }, + { + "epoch": 0.5780211184982401, + "loss": 0.062980055809021, + "loss_ce": 0.00566804688423872, + "loss_iou": 0.296875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 573297060, + "step": 5912 + }, + { + "epoch": 0.5781188893234259, + "grad_norm": 4.8180548667716785, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 573393512, + "step": 5913 + }, + { + "epoch": 0.5781188893234259, + "loss": 0.11034908890724182, + "loss_ce": 0.007001310586929321, + "loss_iou": 0.259765625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 573393512, + "step": 5913 + }, + { + "epoch": 0.5782166601486116, + "grad_norm": 4.385466757259202, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 573491112, + "step": 5914 + }, + { + "epoch": 0.5782166601486116, + "loss": 0.10758301615715027, + "loss_ce": 0.007531139999628067, + "loss_iou": 0.291015625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 573491112, + "step": 5914 + }, + { + "epoch": 0.5783144309737974, + "grad_norm": 16.261847095779153, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 573588188, + "step": 5915 + }, + { + "epoch": 0.5783144309737974, + "loss": 0.0746615007519722, + "loss_ce": 0.0050661638379096985, + "loss_iou": 0.359375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 573588188, + "step": 5915 + }, + { + "epoch": 0.5784122017989832, + "grad_norm": 5.991445117917446, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 573685956, + "step": 5916 + }, + { + "epoch": 0.5784122017989832, + "loss": 0.16647426784038544, + "loss_ce": 0.008149070665240288, + "loss_iou": 0.431640625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 573685956, + "step": 5916 + }, + { + "epoch": 0.5785099726241689, + "grad_norm": 7.651342297755898, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 573782668, + "step": 5917 + }, + { + "epoch": 0.5785099726241689, + "loss": 0.07319265604019165, + "loss_ce": 0.004615842364728451, + "loss_iou": 0.25390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 573782668, + "step": 5917 + }, + { + "epoch": 0.5786077434493547, + "grad_norm": 6.957562870099974, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 573880232, + "step": 5918 + }, + { + "epoch": 0.5786077434493547, + "loss": 0.09717035293579102, + "loss_ce": 0.004122260957956314, + "loss_iou": 0.31640625, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 573880232, + "step": 5918 + }, + { + "epoch": 0.5787055142745404, + "grad_norm": 10.451514507964317, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 573976572, + "step": 5919 + }, + { + "epoch": 0.5787055142745404, + "loss": 0.1488485485315323, + "loss_ce": 0.010909100994467735, + "loss_iou": 0.2421875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 573976572, + "step": 5919 + }, + { + "epoch": 0.5788032850997262, + "grad_norm": 11.104506374438845, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 574073944, + "step": 5920 + }, + { + "epoch": 0.5788032850997262, + "loss": 0.06545254588127136, + "loss_ce": 0.003227207576856017, + "loss_iou": 0.35546875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 574073944, + "step": 5920 + }, + { + "epoch": 0.578901055924912, + "grad_norm": 14.532926406237845, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 574171288, + "step": 5921 + }, + { + "epoch": 0.578901055924912, + "loss": 0.05051357299089432, + "loss_ce": 0.0036156855057924986, + "loss_iou": 0.294921875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 574171288, + "step": 5921 + }, + { + "epoch": 0.5789988267500977, + "grad_norm": 12.196741036382765, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 574268120, + "step": 5922 + }, + { + "epoch": 0.5789988267500977, + "loss": 0.1268012821674347, + "loss_ce": 0.0068671926856040955, + "loss_iou": 0.328125, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 574268120, + "step": 5922 + }, + { + "epoch": 0.5790965975752835, + "grad_norm": 6.8503812284813685, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 574365128, + "step": 5923 + }, + { + "epoch": 0.5790965975752835, + "loss": 0.05418151617050171, + "loss_ce": 0.005940849892795086, + "loss_iou": 0.228515625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 574365128, + "step": 5923 + }, + { + "epoch": 0.5791943684004693, + "grad_norm": 5.7502130900401625, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 574461128, + "step": 5924 + }, + { + "epoch": 0.5791943684004693, + "loss": 0.07107602804899216, + "loss_ce": 0.010346047580242157, + "loss_iou": 0.25, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 574461128, + "step": 5924 + }, + { + "epoch": 0.579292139225655, + "grad_norm": 9.348283604917894, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 574557076, + "step": 5925 + }, + { + "epoch": 0.579292139225655, + "loss": 0.13292860984802246, + "loss_ce": 0.008294819854199886, + "loss_iou": 0.1884765625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 574557076, + "step": 5925 + }, + { + "epoch": 0.5793899100508408, + "grad_norm": 8.341608961311076, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 574654340, + "step": 5926 + }, + { + "epoch": 0.5793899100508408, + "loss": 0.039783693850040436, + "loss_ce": 0.0057565923780202866, + "loss_iou": 0.1845703125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 574654340, + "step": 5926 + }, + { + "epoch": 0.5794876808760266, + "grad_norm": 21.644310365983223, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 574751124, + "step": 5927 + }, + { + "epoch": 0.5794876808760266, + "loss": 0.10241426527500153, + "loss_ce": 0.0033084386959671974, + "loss_iou": 0.349609375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 574751124, + "step": 5927 + }, + { + "epoch": 0.5795854517012123, + "grad_norm": 13.765715079153022, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 574848360, + "step": 5928 + }, + { + "epoch": 0.5795854517012123, + "loss": 0.10725898295640945, + "loss_ce": 0.003789133159443736, + "loss_iou": 0.451171875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 574848360, + "step": 5928 + }, + { + "epoch": 0.5796832225263981, + "grad_norm": 10.047168577318969, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 574945100, + "step": 5929 + }, + { + "epoch": 0.5796832225263981, + "loss": 0.10079285502433777, + "loss_ce": 0.004532788880169392, + "loss_iou": 0.287109375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 574945100, + "step": 5929 + }, + { + "epoch": 0.5797809933515838, + "grad_norm": 7.019249720756094, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 575042060, + "step": 5930 + }, + { + "epoch": 0.5797809933515838, + "loss": 0.10160791873931885, + "loss_ce": 0.005401255562901497, + "loss_iou": 0.326171875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 575042060, + "step": 5930 + }, + { + "epoch": 0.5798787641767696, + "grad_norm": 4.0900807655823685, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 575139256, + "step": 5931 + }, + { + "epoch": 0.5798787641767696, + "loss": 0.09156119078397751, + "loss_ce": 0.005192626733332872, + "loss_iou": 0.2578125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 575139256, + "step": 5931 + }, + { + "epoch": 0.5799765350019555, + "grad_norm": 3.59966512041819, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 575236560, + "step": 5932 + }, + { + "epoch": 0.5799765350019555, + "loss": 0.09204995632171631, + "loss_ce": 0.0038541576359421015, + "loss_iou": 0.38671875, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 575236560, + "step": 5932 + }, + { + "epoch": 0.5800743058271411, + "grad_norm": 2.8916671184965703, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 575332476, + "step": 5933 + }, + { + "epoch": 0.5800743058271411, + "loss": 0.05539761856198311, + "loss_ce": 0.0026632435619831085, + "loss_iou": 0.3046875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 575332476, + "step": 5933 + }, + { + "epoch": 0.580172076652327, + "grad_norm": 9.493306806730928, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 575429508, + "step": 5934 + }, + { + "epoch": 0.580172076652327, + "loss": 0.09766434878110886, + "loss_ce": 0.005318159237504005, + "loss_iou": 0.337890625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 575429508, + "step": 5934 + }, + { + "epoch": 0.5802698474775128, + "grad_norm": 26.012144132626535, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 575525852, + "step": 5935 + }, + { + "epoch": 0.5802698474775128, + "loss": 0.08805039525032043, + "loss_ce": 0.007514507509768009, + "loss_iou": 0.166015625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 575525852, + "step": 5935 + }, + { + "epoch": 0.5803676183026985, + "grad_norm": 7.8863974604555365, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 575622620, + "step": 5936 + }, + { + "epoch": 0.5803676183026985, + "loss": 0.08773002028465271, + "loss_ce": 0.007155977189540863, + "loss_iou": 0.1982421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 575622620, + "step": 5936 + }, + { + "epoch": 0.5804653891278843, + "grad_norm": 5.748892661985866, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 575719228, + "step": 5937 + }, + { + "epoch": 0.5804653891278843, + "loss": 0.12961751222610474, + "loss_ce": 0.007119952701032162, + "loss_iou": 0.35546875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 575719228, + "step": 5937 + }, + { + "epoch": 0.58056315995307, + "grad_norm": 9.442500997431543, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 575816552, + "step": 5938 + }, + { + "epoch": 0.58056315995307, + "loss": 0.09377270191907883, + "loss_ce": 0.004653742536902428, + "loss_iou": 0.328125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 575816552, + "step": 5938 + }, + { + "epoch": 0.5806609307782558, + "grad_norm": 15.535280052574386, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 575913448, + "step": 5939 + }, + { + "epoch": 0.5806609307782558, + "loss": 0.07385872304439545, + "loss_ce": 0.007788165472447872, + "loss_iou": 0.375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 575913448, + "step": 5939 + }, + { + "epoch": 0.5807587016034416, + "grad_norm": 8.67377355799992, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 576010728, + "step": 5940 + }, + { + "epoch": 0.5807587016034416, + "loss": 0.11832204461097717, + "loss_ce": 0.006826071068644524, + "loss_iou": 0.400390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 576010728, + "step": 5940 + }, + { + "epoch": 0.5808564724286273, + "grad_norm": 8.963325090361993, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 576107456, + "step": 5941 + }, + { + "epoch": 0.5808564724286273, + "loss": 0.10463923215866089, + "loss_ce": 0.0024816407822072506, + "loss_iou": 0.30859375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 576107456, + "step": 5941 + }, + { + "epoch": 0.5809542432538131, + "grad_norm": 12.49341814577625, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 576204380, + "step": 5942 + }, + { + "epoch": 0.5809542432538131, + "loss": 0.08109964430332184, + "loss_ce": 0.009947909973561764, + "loss_iou": 0.2158203125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 576204380, + "step": 5942 + }, + { + "epoch": 0.5810520140789989, + "grad_norm": 10.980911572826406, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 576301752, + "step": 5943 + }, + { + "epoch": 0.5810520140789989, + "loss": 0.0908583477139473, + "loss_ce": 0.004554637707769871, + "loss_iou": 0.2451171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 576301752, + "step": 5943 + }, + { + "epoch": 0.5811497849041846, + "grad_norm": 15.040516889128709, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 576397676, + "step": 5944 + }, + { + "epoch": 0.5811497849041846, + "loss": 0.037088215351104736, + "loss_ce": 0.004060564562678337, + "loss_iou": 0.21484375, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 576397676, + "step": 5944 + }, + { + "epoch": 0.5812475557293704, + "grad_norm": 13.783438887362005, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 576495352, + "step": 5945 + }, + { + "epoch": 0.5812475557293704, + "loss": 0.06161472201347351, + "loss_ce": 0.0038068019784986973, + "loss_iou": 0.2890625, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 576495352, + "step": 5945 + }, + { + "epoch": 0.5813453265545562, + "grad_norm": 8.573460650270242, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 576591912, + "step": 5946 + }, + { + "epoch": 0.5813453265545562, + "loss": 0.05826248973608017, + "loss_ce": 0.0033003315329551697, + "loss_iou": 0.3515625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 576591912, + "step": 5946 + }, + { + "epoch": 0.5814430973797419, + "grad_norm": 5.581268365066857, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 576687864, + "step": 5947 + }, + { + "epoch": 0.5814430973797419, + "loss": 0.08501460403203964, + "loss_ce": 0.0051882509142160416, + "loss_iou": 0.25, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 576687864, + "step": 5947 + }, + { + "epoch": 0.5815408682049277, + "grad_norm": 7.83881438856986, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 576785192, + "step": 5948 + }, + { + "epoch": 0.5815408682049277, + "loss": 0.056109074503183365, + "loss_ce": 0.005465155467391014, + "loss_iou": 0.30078125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 576785192, + "step": 5948 + }, + { + "epoch": 0.5816386390301134, + "grad_norm": 5.019841726560145, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 576880792, + "step": 5949 + }, + { + "epoch": 0.5816386390301134, + "loss": 0.07326404750347137, + "loss_ce": 0.0073765963315963745, + "loss_iou": 0.1259765625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 576880792, + "step": 5949 + }, + { + "epoch": 0.5817364098552992, + "grad_norm": 2.746099468109906, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 576977932, + "step": 5950 + }, + { + "epoch": 0.5817364098552992, + "loss": 0.06255598366260529, + "loss_ce": 0.007212352007627487, + "loss_iou": 0.27734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 576977932, + "step": 5950 + }, + { + "epoch": 0.581834180680485, + "grad_norm": 9.176233328437199, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 577075072, + "step": 5951 + }, + { + "epoch": 0.581834180680485, + "loss": 0.07454047352075577, + "loss_ce": 0.0035718416329473257, + "loss_iou": 0.26953125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 577075072, + "step": 5951 + }, + { + "epoch": 0.5819319515056707, + "grad_norm": 3.028345233985143, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 577172408, + "step": 5952 + }, + { + "epoch": 0.5819319515056707, + "loss": 0.09313056617975235, + "loss_ce": 0.004980543628334999, + "loss_iou": 0.2373046875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 577172408, + "step": 5952 + }, + { + "epoch": 0.5820297223308565, + "grad_norm": 16.816768855553025, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 577268916, + "step": 5953 + }, + { + "epoch": 0.5820297223308565, + "loss": 0.06849677860736847, + "loss_ce": 0.00723274378105998, + "loss_iou": 0.271484375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 577268916, + "step": 5953 + }, + { + "epoch": 0.5821274931560423, + "grad_norm": 6.870722195907779, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 577364748, + "step": 5954 + }, + { + "epoch": 0.5821274931560423, + "loss": 0.10719849169254303, + "loss_ce": 0.0051782261580228806, + "loss_iou": 0.29296875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 577364748, + "step": 5954 + }, + { + "epoch": 0.582225263981228, + "grad_norm": 12.441153975971043, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 577461104, + "step": 5955 + }, + { + "epoch": 0.582225263981228, + "loss": 0.05077466368675232, + "loss_ce": 0.004616828169673681, + "loss_iou": 0.25390625, + "loss_num": 0.00927734375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 577461104, + "step": 5955 + }, + { + "epoch": 0.5823230348064138, + "grad_norm": 3.833704336601323, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 577558736, + "step": 5956 + }, + { + "epoch": 0.5823230348064138, + "loss": 0.08032930642366409, + "loss_ce": 0.007102375850081444, + "loss_iou": 0.31640625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 577558736, + "step": 5956 + }, + { + "epoch": 0.5824208056315995, + "grad_norm": 11.584536879302272, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 577656280, + "step": 5957 + }, + { + "epoch": 0.5824208056315995, + "loss": 0.09571047127246857, + "loss_ce": 0.007072168402373791, + "loss_iou": 0.33203125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 577656280, + "step": 5957 + }, + { + "epoch": 0.5825185764567853, + "grad_norm": 4.869954033205026, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 577752788, + "step": 5958 + }, + { + "epoch": 0.5825185764567853, + "loss": 0.05560782179236412, + "loss_ce": 0.0021753597538918257, + "loss_iou": 0.3125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 577752788, + "step": 5958 + }, + { + "epoch": 0.5826163472819711, + "grad_norm": 7.412972941041723, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 577849656, + "step": 5959 + }, + { + "epoch": 0.5826163472819711, + "loss": 0.08150257170200348, + "loss_ce": 0.006291993428021669, + "loss_iou": 0.1796875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 577849656, + "step": 5959 + }, + { + "epoch": 0.5827141181071568, + "grad_norm": 1.9172992926040335, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 577947160, + "step": 5960 + }, + { + "epoch": 0.5827141181071568, + "loss": 0.06723219156265259, + "loss_ce": 0.00166518019977957, + "loss_iou": 0.287109375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 577947160, + "step": 5960 + }, + { + "epoch": 0.5828118889323426, + "grad_norm": 20.63083217989792, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 578044004, + "step": 5961 + }, + { + "epoch": 0.5828118889323426, + "loss": 0.04149797558784485, + "loss_ce": 0.005838185548782349, + "loss_iou": 0.3359375, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 578044004, + "step": 5961 + }, + { + "epoch": 0.5829096597575284, + "grad_norm": 7.859617327446636, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 578140668, + "step": 5962 + }, + { + "epoch": 0.5829096597575284, + "loss": 0.04147176817059517, + "loss_ce": 0.0034316072706133127, + "loss_iou": 0.35546875, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 578140668, + "step": 5962 + }, + { + "epoch": 0.5830074305827141, + "grad_norm": 8.191663176426534, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 578237028, + "step": 5963 + }, + { + "epoch": 0.5830074305827141, + "loss": 0.07302629202604294, + "loss_ce": 0.005971543490886688, + "loss_iou": 0.1953125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 578237028, + "step": 5963 + }, + { + "epoch": 0.5831052014078999, + "grad_norm": 1.5080822237469618, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 578333528, + "step": 5964 + }, + { + "epoch": 0.5831052014078999, + "loss": 0.04962331801652908, + "loss_ce": 0.004571743309497833, + "loss_iou": 0.21484375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 578333528, + "step": 5964 + }, + { + "epoch": 0.5832029722330856, + "grad_norm": 24.06725576207895, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 578429880, + "step": 5965 + }, + { + "epoch": 0.5832029722330856, + "loss": 0.07685563713312149, + "loss_ce": 0.002743698190897703, + "loss_iou": 0.265625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 578429880, + "step": 5965 + }, + { + "epoch": 0.5833007430582714, + "grad_norm": 1.7988305404342513, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 578526052, + "step": 5966 + }, + { + "epoch": 0.5833007430582714, + "loss": 0.06778605282306671, + "loss_ce": 0.0018986047944054008, + "loss_iou": 0.373046875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 578526052, + "step": 5966 + }, + { + "epoch": 0.5833985138834572, + "grad_norm": 3.4614478651459613, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 578622576, + "step": 5967 + }, + { + "epoch": 0.5833985138834572, + "loss": 0.0718805268406868, + "loss_ce": 0.002926058368757367, + "loss_iou": 0.259765625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 578622576, + "step": 5967 + }, + { + "epoch": 0.5834962847086429, + "grad_norm": 6.495475404753195, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 578718324, + "step": 5968 + }, + { + "epoch": 0.5834962847086429, + "loss": 0.06749598681926727, + "loss_ce": 0.0032869980204850435, + "loss_iou": 0.25, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 578718324, + "step": 5968 + }, + { + "epoch": 0.5835940555338287, + "grad_norm": 2.619817256463256, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 578815128, + "step": 5969 + }, + { + "epoch": 0.5835940555338287, + "loss": 0.06773731112480164, + "loss_ce": 0.00437519233673811, + "loss_iou": 0.33203125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 578815128, + "step": 5969 + }, + { + "epoch": 0.5836918263590145, + "grad_norm": 7.467702034722254, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 578912520, + "step": 5970 + }, + { + "epoch": 0.5836918263590145, + "loss": 0.06806759536266327, + "loss_ce": 0.0022869554813951254, + "loss_iou": 0.263671875, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 578912520, + "step": 5970 + }, + { + "epoch": 0.5837895971842002, + "grad_norm": 8.75342652537677, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 579009440, + "step": 5971 + }, + { + "epoch": 0.5837895971842002, + "loss": 0.0779334306716919, + "loss_ce": 0.004157187417149544, + "loss_iou": 0.1904296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 579009440, + "step": 5971 + }, + { + "epoch": 0.583887368009386, + "grad_norm": 8.159448716766741, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 579105872, + "step": 5972 + }, + { + "epoch": 0.583887368009386, + "loss": 0.042171381413936615, + "loss_ce": 0.005275626201182604, + "loss_iou": 0.251953125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 579105872, + "step": 5972 + }, + { + "epoch": 0.5839851388345718, + "grad_norm": 10.11050180520234, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 579203292, + "step": 5973 + }, + { + "epoch": 0.5839851388345718, + "loss": 0.08958380669355392, + "loss_ce": 0.003432684112340212, + "loss_iou": 0.37109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 579203292, + "step": 5973 + }, + { + "epoch": 0.5840829096597575, + "grad_norm": 18.46608376627545, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 579300556, + "step": 5974 + }, + { + "epoch": 0.5840829096597575, + "loss": 0.07079794257879257, + "loss_ce": 0.004284880124032497, + "loss_iou": 0.28515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 579300556, + "step": 5974 + }, + { + "epoch": 0.5841806804849433, + "grad_norm": 3.2866565588959964, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 579397040, + "step": 5975 + }, + { + "epoch": 0.5841806804849433, + "loss": 0.08320927619934082, + "loss_ce": 0.003756761783733964, + "loss_iou": 0.31640625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 579397040, + "step": 5975 + }, + { + "epoch": 0.584278451310129, + "grad_norm": 12.129073381109132, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 579493768, + "step": 5976 + }, + { + "epoch": 0.584278451310129, + "loss": 0.05668942630290985, + "loss_ce": 0.0042602261528372765, + "loss_iou": 0.3046875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 579493768, + "step": 5976 + }, + { + "epoch": 0.5843762221353148, + "grad_norm": 8.644042935604741, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 579590360, + "step": 5977 + }, + { + "epoch": 0.5843762221353148, + "loss": 0.07576585561037064, + "loss_ce": 0.006574877072125673, + "loss_iou": 0.341796875, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 579590360, + "step": 5977 + }, + { + "epoch": 0.5844739929605006, + "grad_norm": 17.603329951422655, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 579687036, + "step": 5978 + }, + { + "epoch": 0.5844739929605006, + "loss": 0.10921798646450043, + "loss_ce": 0.003505096072331071, + "loss_iou": 0.43359375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 579687036, + "step": 5978 + }, + { + "epoch": 0.5845717637856863, + "grad_norm": 6.852811218351019, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 579783296, + "step": 5979 + }, + { + "epoch": 0.5845717637856863, + "loss": 0.040533844381570816, + "loss_ce": 0.003920379560440779, + "loss_iou": 0.267578125, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 579783296, + "step": 5979 + }, + { + "epoch": 0.5846695346108721, + "grad_norm": 21.452232072098486, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 579880720, + "step": 5980 + }, + { + "epoch": 0.5846695346108721, + "loss": 0.03206770867109299, + "loss_ce": 0.004754476249217987, + "loss_iou": 0.255859375, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 579880720, + "step": 5980 + }, + { + "epoch": 0.5847673054360579, + "grad_norm": 10.986343118936821, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 579977708, + "step": 5981 + }, + { + "epoch": 0.5847673054360579, + "loss": 0.06317032873630524, + "loss_ce": 0.005400557070970535, + "loss_iou": 0.3671875, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 579977708, + "step": 5981 + }, + { + "epoch": 0.5848650762612436, + "grad_norm": 15.419894117648541, + "learning_rate": 5e-05, + "loss": 0.1158, + "num_input_tokens_seen": 580074984, + "step": 5982 + }, + { + "epoch": 0.5848650762612436, + "loss": 0.09226033091545105, + "loss_ce": 0.004522289149463177, + "loss_iou": 0.40625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 580074984, + "step": 5982 + }, + { + "epoch": 0.5849628470864294, + "grad_norm": 4.992515201244724, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 580172608, + "step": 5983 + }, + { + "epoch": 0.5849628470864294, + "loss": 0.03887031972408295, + "loss_ce": 0.0031494919676333666, + "loss_iou": 0.326171875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 580172608, + "step": 5983 + }, + { + "epoch": 0.5850606179116151, + "grad_norm": 6.004011117284456, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 580269028, + "step": 5984 + }, + { + "epoch": 0.5850606179116151, + "loss": 0.08166395127773285, + "loss_ce": 0.013869145885109901, + "loss_iou": 0.1943359375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 580269028, + "step": 5984 + }, + { + "epoch": 0.5851583887368009, + "grad_norm": 3.652813714698842, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 580365928, + "step": 5985 + }, + { + "epoch": 0.5851583887368009, + "loss": 0.04038082808256149, + "loss_ce": 0.005331391468644142, + "loss_iou": 0.369140625, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 580365928, + "step": 5985 + }, + { + "epoch": 0.5852561595619867, + "grad_norm": 8.347813443557229, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 580463436, + "step": 5986 + }, + { + "epoch": 0.5852561595619867, + "loss": 0.06281083077192307, + "loss_ce": 0.004156044218689203, + "loss_iou": 0.35546875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 580463436, + "step": 5986 + }, + { + "epoch": 0.5853539303871724, + "grad_norm": 10.63758951451908, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 580561000, + "step": 5987 + }, + { + "epoch": 0.5853539303871724, + "loss": 0.089135080575943, + "loss_ce": 0.004616646096110344, + "loss_iou": 0.328125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 580561000, + "step": 5987 + }, + { + "epoch": 0.5854517012123582, + "grad_norm": 50.643812693688005, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 580658748, + "step": 5988 + }, + { + "epoch": 0.5854517012123582, + "loss": 0.054541245102882385, + "loss_ce": 0.006430282257497311, + "loss_iou": 0.30859375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 580658748, + "step": 5988 + }, + { + "epoch": 0.585549472037544, + "grad_norm": 44.83662088141413, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 580755824, + "step": 5989 + }, + { + "epoch": 0.585549472037544, + "loss": 0.08105398714542389, + "loss_ce": 0.01064993254840374, + "loss_iou": 0.400390625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 580755824, + "step": 5989 + }, + { + "epoch": 0.5856472428627297, + "grad_norm": 10.058614122758655, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 580852768, + "step": 5990 + }, + { + "epoch": 0.5856472428627297, + "loss": 0.08606445789337158, + "loss_ce": 0.006383058149367571, + "loss_iou": 0.298828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 580852768, + "step": 5990 + }, + { + "epoch": 0.5857450136879155, + "grad_norm": 5.474614574516262, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 580949384, + "step": 5991 + }, + { + "epoch": 0.5857450136879155, + "loss": 0.05403769388794899, + "loss_ce": 0.0023790616542100906, + "loss_iou": 0.259765625, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 580949384, + "step": 5991 + }, + { + "epoch": 0.5858427845131013, + "grad_norm": 4.527172186391231, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 581045152, + "step": 5992 + }, + { + "epoch": 0.5858427845131013, + "loss": 0.0795430988073349, + "loss_ce": 0.005118353292346001, + "loss_iou": 0.2890625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 581045152, + "step": 5992 + }, + { + "epoch": 0.585940555338287, + "grad_norm": 2.4020939634098446, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 581142752, + "step": 5993 + }, + { + "epoch": 0.585940555338287, + "loss": 0.08894665539264679, + "loss_ce": 0.0041535599157214165, + "loss_iou": 0.306640625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 581142752, + "step": 5993 + }, + { + "epoch": 0.5860383261634728, + "grad_norm": 10.440885877209544, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 581240596, + "step": 5994 + }, + { + "epoch": 0.5860383261634728, + "loss": 0.07690702378749847, + "loss_ce": 0.011065351776778698, + "loss_iou": 0.283203125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 581240596, + "step": 5994 + }, + { + "epoch": 0.5861360969886585, + "grad_norm": 2.457157346361157, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 581337032, + "step": 5995 + }, + { + "epoch": 0.5861360969886585, + "loss": 0.0509425550699234, + "loss_ce": 0.006806508172303438, + "loss_iou": 0.2265625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 581337032, + "step": 5995 + }, + { + "epoch": 0.5862338678138443, + "grad_norm": 5.244886517082361, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 581433060, + "step": 5996 + }, + { + "epoch": 0.5862338678138443, + "loss": 0.06446553021669388, + "loss_ce": 0.00415516272187233, + "loss_iou": 0.19140625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 581433060, + "step": 5996 + }, + { + "epoch": 0.5863316386390302, + "grad_norm": 4.029447875579611, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 581530236, + "step": 5997 + }, + { + "epoch": 0.5863316386390302, + "loss": 0.08281542360782623, + "loss_ce": 0.011862048879265785, + "loss_iou": 0.37109375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 581530236, + "step": 5997 + }, + { + "epoch": 0.5864294094642158, + "grad_norm": 14.4185643538317, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 581627416, + "step": 5998 + }, + { + "epoch": 0.5864294094642158, + "loss": 0.060068681836128235, + "loss_ce": 0.0058694640174508095, + "loss_iou": 0.27734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 581627416, + "step": 5998 + }, + { + "epoch": 0.5865271802894017, + "grad_norm": 15.477729952927827, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 581724588, + "step": 5999 + }, + { + "epoch": 0.5865271802894017, + "loss": 0.06479393690824509, + "loss_ce": 0.006810538005083799, + "loss_iou": 0.267578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 581724588, + "step": 5999 + }, + { + "epoch": 0.5866249511145875, + "grad_norm": 1.618461119098475, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 581821092, + "step": 6000 + }, + { + "epoch": 0.5866249511145875, + "eval_seeclick_CIoU": 0.4954769164323807, + "eval_seeclick_GIoU": 0.4980773627758026, + "eval_seeclick_IoU": 0.5433836281299591, + "eval_seeclick_MAE_all": 0.08267306163907051, + "eval_seeclick_MAE_h": 0.05100431106984615, + "eval_seeclick_MAE_w": 0.11185421422123909, + "eval_seeclick_MAE_x": 0.1161414124071598, + "eval_seeclick_MAE_y": 0.05169234052300453, + "eval_seeclick_NUM_probability": 0.9999972581863403, + "eval_seeclick_inside_bbox": 0.7840909063816071, + "eval_seeclick_loss": 0.29766157269477844, + "eval_seeclick_loss_ce": 0.00958929443731904, + "eval_seeclick_loss_iou": 0.38336181640625, + "eval_seeclick_loss_num": 0.05904388427734375, + "eval_seeclick_loss_xval": 0.295135498046875, + "eval_seeclick_runtime": 79.4822, + "eval_seeclick_samples_per_second": 0.541, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 581821092, + "step": 6000 + }, + { + "epoch": 0.5866249511145875, + "eval_icons_CIoU": 0.6747377216815948, + "eval_icons_GIoU": 0.6748518347740173, + "eval_icons_IoU": 0.709372341632843, + "eval_icons_MAE_all": 0.06571006588637829, + "eval_icons_MAE_h": 0.06391297187656164, + "eval_icons_MAE_w": 0.06964118033647537, + "eval_icons_MAE_x": 0.06828926131129265, + "eval_icons_MAE_y": 0.060996849089860916, + "eval_icons_NUM_probability": 0.9999958276748657, + "eval_icons_inside_bbox": 0.8350694477558136, + "eval_icons_loss": 0.1977168321609497, + "eval_icons_loss_ce": 1.9429371604928747e-06, + "eval_icons_loss_iou": 0.302978515625, + "eval_icons_loss_num": 0.0431976318359375, + "eval_icons_loss_xval": 0.2159881591796875, + "eval_icons_runtime": 88.3429, + "eval_icons_samples_per_second": 0.566, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 581821092, + "step": 6000 + }, + { + "epoch": 0.5866249511145875, + "eval_screenspot_CIoU": 0.2692934473355611, + "eval_screenspot_GIoU": 0.25003613779942196, + "eval_screenspot_IoU": 0.37971388300259906, + "eval_screenspot_MAE_all": 0.1848317633072535, + "eval_screenspot_MAE_h": 0.1260831505060196, + "eval_screenspot_MAE_w": 0.24588205416997275, + "eval_screenspot_MAE_x": 0.24300687263409296, + "eval_screenspot_MAE_y": 0.12435497840245564, + "eval_screenspot_NUM_probability": 0.9999959667523702, + "eval_screenspot_inside_bbox": 0.5854166746139526, + "eval_screenspot_loss": 0.6353309154510498, + "eval_screenspot_loss_ce": 0.02685594993333022, + "eval_screenspot_loss_iou": 0.3082682291666667, + "eval_screenspot_loss_num": 0.12523396809895834, + "eval_screenspot_loss_xval": 0.62615966796875, + "eval_screenspot_runtime": 156.372, + "eval_screenspot_samples_per_second": 0.569, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 581821092, + "step": 6000 + }, + { + "epoch": 0.5866249511145875, + "eval_compot_CIoU": 0.48650504648685455, + "eval_compot_GIoU": 0.4660881608724594, + "eval_compot_IoU": 0.5425743162631989, + "eval_compot_MAE_all": 0.09120092168450356, + "eval_compot_MAE_h": 0.0908254086971283, + "eval_compot_MAE_w": 0.09470296837389469, + "eval_compot_MAE_x": 0.08591526374220848, + "eval_compot_MAE_y": 0.0933600589632988, + "eval_compot_NUM_probability": 0.9999851882457733, + "eval_compot_inside_bbox": 0.7638888955116272, + "eval_compot_loss": 0.30091020464897156, + "eval_compot_loss_ce": 0.01928346138447523, + "eval_compot_loss_iou": 0.413330078125, + "eval_compot_loss_num": 0.0515899658203125, + "eval_compot_loss_xval": 0.2579803466796875, + "eval_compot_runtime": 88.8726, + "eval_compot_samples_per_second": 0.563, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 581821092, + "step": 6000 + }, + { + "epoch": 0.5866249511145875, + "loss": 0.24629893898963928, + "loss_ce": 0.020011086016893387, + "loss_iou": 0.40234375, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 581821092, + "step": 6000 + }, + { + "epoch": 0.5867227219397732, + "grad_norm": 35.21942828167241, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 581917584, + "step": 6001 + }, + { + "epoch": 0.5867227219397732, + "loss": 0.06287866085767746, + "loss_ce": 0.0032625733874738216, + "loss_iou": 0.216796875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 581917584, + "step": 6001 + }, + { + "epoch": 0.586820492764959, + "grad_norm": 3.742936466422029, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 582014640, + "step": 6002 + }, + { + "epoch": 0.586820492764959, + "loss": 0.0884929746389389, + "loss_ce": 0.003959287889301777, + "loss_iou": 0.27734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 582014640, + "step": 6002 + }, + { + "epoch": 0.5869182635901447, + "grad_norm": 6.394553057625527, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 582112212, + "step": 6003 + }, + { + "epoch": 0.5869182635901447, + "loss": 0.06657004356384277, + "loss_ce": 0.007274392526596785, + "loss_iou": 0.30859375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 582112212, + "step": 6003 + }, + { + "epoch": 0.5870160344153305, + "grad_norm": 15.32509468777365, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 582209244, + "step": 6004 + }, + { + "epoch": 0.5870160344153305, + "loss": 0.07822099328041077, + "loss_ce": 0.006916671060025692, + "loss_iou": 0.275390625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 582209244, + "step": 6004 + }, + { + "epoch": 0.5871138052405163, + "grad_norm": 5.642176263168576, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 582306028, + "step": 6005 + }, + { + "epoch": 0.5871138052405163, + "loss": 0.06406429409980774, + "loss_ce": 0.004349023103713989, + "loss_iou": 0.333984375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 582306028, + "step": 6005 + }, + { + "epoch": 0.587211576065702, + "grad_norm": 7.725488381104926, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 582402596, + "step": 6006 + }, + { + "epoch": 0.587211576065702, + "loss": 0.08070416003465652, + "loss_ce": 0.005203671753406525, + "loss_iou": 0.365234375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 582402596, + "step": 6006 + }, + { + "epoch": 0.5873093468908878, + "grad_norm": 4.017518457377324, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 582498916, + "step": 6007 + }, + { + "epoch": 0.5873093468908878, + "loss": 0.08524151146411896, + "loss_ce": 0.005651670973747969, + "loss_iou": 0.3203125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 582498916, + "step": 6007 + }, + { + "epoch": 0.5874071177160736, + "grad_norm": 6.125799855171644, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 582595860, + "step": 6008 + }, + { + "epoch": 0.5874071177160736, + "loss": 0.06745453178882599, + "loss_ce": 0.007731623947620392, + "loss_iou": 0.359375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 582595860, + "step": 6008 + }, + { + "epoch": 0.5875048885412593, + "grad_norm": 2.2573870404021017, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 582693132, + "step": 6009 + }, + { + "epoch": 0.5875048885412593, + "loss": 0.13401269912719727, + "loss_ce": 0.007166385650634766, + "loss_iou": 0.279296875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 582693132, + "step": 6009 + }, + { + "epoch": 0.5876026593664451, + "grad_norm": 9.869502242309272, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 582790892, + "step": 6010 + }, + { + "epoch": 0.5876026593664451, + "loss": 0.12576261162757874, + "loss_ce": 0.008926074020564556, + "loss_iou": 0.314453125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 582790892, + "step": 6010 + }, + { + "epoch": 0.5877004301916308, + "grad_norm": 5.648706522710391, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 582888020, + "step": 6011 + }, + { + "epoch": 0.5877004301916308, + "loss": 0.04236960411071777, + "loss_ce": 0.001643896452151239, + "loss_iou": 0.306640625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 582888020, + "step": 6011 + }, + { + "epoch": 0.5877982010168166, + "grad_norm": 22.206416601659033, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 582985100, + "step": 6012 + }, + { + "epoch": 0.5877982010168166, + "loss": 0.09636896103620529, + "loss_ce": 0.002588448114693165, + "loss_iou": 0.298828125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 582985100, + "step": 6012 + }, + { + "epoch": 0.5878959718420024, + "grad_norm": 6.596715145878203, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 583082396, + "step": 6013 + }, + { + "epoch": 0.5878959718420024, + "loss": 0.08190829306840897, + "loss_ce": 0.0054617589339613914, + "loss_iou": 0.357421875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 583082396, + "step": 6013 + }, + { + "epoch": 0.5879937426671881, + "grad_norm": 13.213624591863056, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 583179400, + "step": 6014 + }, + { + "epoch": 0.5879937426671881, + "loss": 0.06702379882335663, + "loss_ce": 0.004981563426554203, + "loss_iou": 0.283203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 583179400, + "step": 6014 + }, + { + "epoch": 0.5880915134923739, + "grad_norm": 15.388668365752983, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 583277044, + "step": 6015 + }, + { + "epoch": 0.5880915134923739, + "loss": 0.081444650888443, + "loss_ce": 0.00920954905450344, + "loss_iou": 0.33203125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 583277044, + "step": 6015 + }, + { + "epoch": 0.5881892843175597, + "grad_norm": 5.644985601731956, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 583373432, + "step": 6016 + }, + { + "epoch": 0.5881892843175597, + "loss": 0.09054628014564514, + "loss_ce": 0.01019350066781044, + "loss_iou": 0.138671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 583373432, + "step": 6016 + }, + { + "epoch": 0.5882870551427454, + "grad_norm": 3.3431152907185133, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 583471464, + "step": 6017 + }, + { + "epoch": 0.5882870551427454, + "loss": 0.07796955853700638, + "loss_ce": 0.007367145270109177, + "loss_iou": 0.349609375, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 583471464, + "step": 6017 + }, + { + "epoch": 0.5883848259679312, + "grad_norm": 5.273426397285317, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 583568976, + "step": 6018 + }, + { + "epoch": 0.5883848259679312, + "loss": 0.07679246366024017, + "loss_ce": 0.006693585775792599, + "loss_iou": 0.30078125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 583568976, + "step": 6018 + }, + { + "epoch": 0.588482596793117, + "grad_norm": 5.704806788006516, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 583666480, + "step": 6019 + }, + { + "epoch": 0.588482596793117, + "loss": 0.07728831470012665, + "loss_ce": 0.0061137001030147076, + "loss_iou": 0.279296875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 583666480, + "step": 6019 + }, + { + "epoch": 0.5885803676183027, + "grad_norm": 4.413272532211082, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 583763240, + "step": 6020 + }, + { + "epoch": 0.5885803676183027, + "loss": 0.052317842841148376, + "loss_ce": 0.0069458335638046265, + "loss_iou": 0.189453125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 583763240, + "step": 6020 + }, + { + "epoch": 0.5886781384434885, + "grad_norm": 4.023671807942881, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 583859216, + "step": 6021 + }, + { + "epoch": 0.5886781384434885, + "loss": 0.07155913859605789, + "loss_ce": 0.00883787963539362, + "loss_iou": 0.26953125, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 583859216, + "step": 6021 + }, + { + "epoch": 0.5887759092686742, + "grad_norm": 3.6626406576477963, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 583956296, + "step": 6022 + }, + { + "epoch": 0.5887759092686742, + "loss": 0.03541070595383644, + "loss_ce": 0.0014370138524100184, + "loss_iou": 0.287109375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 583956296, + "step": 6022 + }, + { + "epoch": 0.58887368009386, + "grad_norm": 4.362814564888464, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 584053420, + "step": 6023 + }, + { + "epoch": 0.58887368009386, + "loss": 0.07615463435649872, + "loss_ce": 0.006154940463602543, + "loss_iou": 0.21875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 584053420, + "step": 6023 + }, + { + "epoch": 0.5889714509190458, + "grad_norm": 8.045827379025202, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 584150032, + "step": 6024 + }, + { + "epoch": 0.5889714509190458, + "loss": 0.06460458040237427, + "loss_ce": 0.004885490518063307, + "loss_iou": 0.15234375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 584150032, + "step": 6024 + }, + { + "epoch": 0.5890692217442315, + "grad_norm": 9.734343798577253, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 584246976, + "step": 6025 + }, + { + "epoch": 0.5890692217442315, + "loss": 0.09796636551618576, + "loss_ce": 0.006596735678613186, + "loss_iou": 0.220703125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 584246976, + "step": 6025 + }, + { + "epoch": 0.5891669925694173, + "grad_norm": 10.675814609368308, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 584343440, + "step": 6026 + }, + { + "epoch": 0.5891669925694173, + "loss": 0.05800803750753403, + "loss_ce": 0.006410445552319288, + "loss_iou": 0.2353515625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 584343440, + "step": 6026 + }, + { + "epoch": 0.5892647633946031, + "grad_norm": 9.301234902055059, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 584440064, + "step": 6027 + }, + { + "epoch": 0.5892647633946031, + "loss": 0.10867410153150558, + "loss_ce": 0.003357938025146723, + "loss_iou": 0.3125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 584440064, + "step": 6027 + }, + { + "epoch": 0.5893625342197888, + "grad_norm": 11.196506027871255, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 584536648, + "step": 6028 + }, + { + "epoch": 0.5893625342197888, + "loss": 0.056571900844573975, + "loss_ce": 0.0020369919948279858, + "loss_iou": 0.193359375, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 584536648, + "step": 6028 + }, + { + "epoch": 0.5894603050449746, + "grad_norm": 5.539828441598055, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 584633692, + "step": 6029 + }, + { + "epoch": 0.5894603050449746, + "loss": 0.0673174187541008, + "loss_ce": 0.004214697517454624, + "loss_iou": 0.32421875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 584633692, + "step": 6029 + }, + { + "epoch": 0.5895580758701603, + "grad_norm": 5.890364325859307, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 584730596, + "step": 6030 + }, + { + "epoch": 0.5895580758701603, + "loss": 0.1161775216460228, + "loss_ce": 0.0054521216079592705, + "loss_iou": 0.205078125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 584730596, + "step": 6030 + }, + { + "epoch": 0.5896558466953461, + "grad_norm": 4.154647453368512, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 584827632, + "step": 6031 + }, + { + "epoch": 0.5896558466953461, + "loss": 0.08731567859649658, + "loss_ce": 0.006596686318516731, + "loss_iou": 0.369140625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 584827632, + "step": 6031 + }, + { + "epoch": 0.5897536175205319, + "grad_norm": 7.478526448842486, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 584923100, + "step": 6032 + }, + { + "epoch": 0.5897536175205319, + "loss": 0.11878400295972824, + "loss_ce": 0.005914740730077028, + "loss_iou": 0.26171875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 584923100, + "step": 6032 + }, + { + "epoch": 0.5898513883457176, + "grad_norm": 13.349354718183157, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 585020676, + "step": 6033 + }, + { + "epoch": 0.5898513883457176, + "loss": 0.034740421921014786, + "loss_ce": 0.0022849757224321365, + "loss_iou": 0.33203125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 585020676, + "step": 6033 + }, + { + "epoch": 0.5899491591709034, + "grad_norm": 60.1594123690301, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 585117340, + "step": 6034 + }, + { + "epoch": 0.5899491591709034, + "loss": 0.09928809106349945, + "loss_ce": 0.004561528563499451, + "loss_iou": 0.326171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 585117340, + "step": 6034 + }, + { + "epoch": 0.5900469299960892, + "grad_norm": 27.12423881625777, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 585214260, + "step": 6035 + }, + { + "epoch": 0.5900469299960892, + "loss": 0.08327552676200867, + "loss_ce": 0.009667128324508667, + "loss_iou": 0.248046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 585214260, + "step": 6035 + }, + { + "epoch": 0.5901447008212749, + "grad_norm": 3.2886694800314156, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 585311032, + "step": 6036 + }, + { + "epoch": 0.5901447008212749, + "loss": 0.09562773257493973, + "loss_ce": 0.010361622087657452, + "loss_iou": 0.28125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 585311032, + "step": 6036 + }, + { + "epoch": 0.5902424716464607, + "grad_norm": 6.410962139509277, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 585408088, + "step": 6037 + }, + { + "epoch": 0.5902424716464607, + "loss": 0.09954977035522461, + "loss_ce": 0.004884236957877874, + "loss_iou": 0.3671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 585408088, + "step": 6037 + }, + { + "epoch": 0.5903402424716465, + "grad_norm": 5.130697713924359, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 585504624, + "step": 6038 + }, + { + "epoch": 0.5903402424716465, + "loss": 0.09501562267541885, + "loss_ce": 0.005873773247003555, + "loss_iou": 0.25, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 585504624, + "step": 6038 + }, + { + "epoch": 0.5904380132968322, + "grad_norm": 7.016543563483057, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 585601760, + "step": 6039 + }, + { + "epoch": 0.5904380132968322, + "loss": 0.06375107169151306, + "loss_ce": 0.0037306207232177258, + "loss_iou": 0.310546875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 585601760, + "step": 6039 + }, + { + "epoch": 0.590535784122018, + "grad_norm": 19.195478198409194, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 585698852, + "step": 6040 + }, + { + "epoch": 0.590535784122018, + "loss": 0.070229172706604, + "loss_ce": 0.003731370437890291, + "loss_iou": 0.322265625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 585698852, + "step": 6040 + }, + { + "epoch": 0.5906335549472037, + "grad_norm": 11.666592496628297, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 585795516, + "step": 6041 + }, + { + "epoch": 0.5906335549472037, + "loss": 0.09628265351057053, + "loss_ce": 0.007522274740040302, + "loss_iou": 0.3203125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 585795516, + "step": 6041 + }, + { + "epoch": 0.5907313257723895, + "grad_norm": 5.2413109038527175, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 585892048, + "step": 6042 + }, + { + "epoch": 0.5907313257723895, + "loss": 0.07924403995275497, + "loss_ce": 0.005513573065400124, + "loss_iou": 0.287109375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 585892048, + "step": 6042 + }, + { + "epoch": 0.5908290965975753, + "grad_norm": 1.5445105598807145, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 585989320, + "step": 6043 + }, + { + "epoch": 0.5908290965975753, + "loss": 0.05714414268732071, + "loss_ce": 0.007858255878090858, + "loss_iou": 0.3046875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 585989320, + "step": 6043 + }, + { + "epoch": 0.590926867422761, + "grad_norm": 13.45527820904367, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 586084996, + "step": 6044 + }, + { + "epoch": 0.590926867422761, + "loss": 0.06540136784315109, + "loss_ce": 0.007231045048683882, + "loss_iou": 0.154296875, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 586084996, + "step": 6044 + }, + { + "epoch": 0.5910246382479468, + "grad_norm": 22.555808165048035, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 586181712, + "step": 6045 + }, + { + "epoch": 0.5910246382479468, + "loss": 0.08908252418041229, + "loss_ce": 0.006791871041059494, + "loss_iou": 0.244140625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 586181712, + "step": 6045 + }, + { + "epoch": 0.5911224090731326, + "grad_norm": 10.074403654799523, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 586279584, + "step": 6046 + }, + { + "epoch": 0.5911224090731326, + "loss": 0.05817033722996712, + "loss_ce": 0.005195639096200466, + "loss_iou": 0.29296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 586279584, + "step": 6046 + }, + { + "epoch": 0.5912201798983183, + "grad_norm": 6.393803977011716, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 586376908, + "step": 6047 + }, + { + "epoch": 0.5912201798983183, + "loss": 0.06943626701831818, + "loss_ce": 0.0014736225130036473, + "loss_iou": 0.3828125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 586376908, + "step": 6047 + }, + { + "epoch": 0.5913179507235041, + "grad_norm": 4.46383866253166, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 586473540, + "step": 6048 + }, + { + "epoch": 0.5913179507235041, + "loss": 0.07939587533473969, + "loss_ce": 0.0075117177329957485, + "loss_iou": 0.267578125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 586473540, + "step": 6048 + }, + { + "epoch": 0.5914157215486898, + "grad_norm": 4.196288446593526, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 586569512, + "step": 6049 + }, + { + "epoch": 0.5914157215486898, + "loss": 0.053238727152347565, + "loss_ce": 0.004532675724476576, + "loss_iou": 0.271484375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 586569512, + "step": 6049 + }, + { + "epoch": 0.5915134923738756, + "grad_norm": 6.440729785893371, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 586668172, + "step": 6050 + }, + { + "epoch": 0.5915134923738756, + "loss": 0.06908847391605377, + "loss_ce": 0.0033688750118017197, + "loss_iou": 0.3125, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 586668172, + "step": 6050 + }, + { + "epoch": 0.5916112631990614, + "grad_norm": 8.291683442195481, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 586765760, + "step": 6051 + }, + { + "epoch": 0.5916112631990614, + "loss": 0.0545111820101738, + "loss_ce": 0.006682507693767548, + "loss_iou": 0.2412109375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 586765760, + "step": 6051 + }, + { + "epoch": 0.5917090340242471, + "grad_norm": 9.735616934336269, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 586862572, + "step": 6052 + }, + { + "epoch": 0.5917090340242471, + "loss": 0.09087512642145157, + "loss_ce": 0.006356693804264069, + "loss_iou": 0.322265625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 586862572, + "step": 6052 + }, + { + "epoch": 0.5918068048494329, + "grad_norm": 15.577187050400568, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 586958376, + "step": 6053 + }, + { + "epoch": 0.5918068048494329, + "loss": 0.09803402423858643, + "loss_ce": 0.003002288518473506, + "loss_iou": 0.296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 586958376, + "step": 6053 + }, + { + "epoch": 0.5919045756746187, + "grad_norm": 3.8027873908017034, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 587055296, + "step": 6054 + }, + { + "epoch": 0.5919045756746187, + "loss": 0.08889861404895782, + "loss_ce": 0.005448304582387209, + "loss_iou": 0.37890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 587055296, + "step": 6054 + }, + { + "epoch": 0.5920023464998044, + "grad_norm": 27.255612904033665, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 587152860, + "step": 6055 + }, + { + "epoch": 0.5920023464998044, + "loss": 0.10230650007724762, + "loss_ce": 0.004741804674267769, + "loss_iou": 0.373046875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 587152860, + "step": 6055 + }, + { + "epoch": 0.5921001173249902, + "grad_norm": 3.4707254461752255, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 587249368, + "step": 6056 + }, + { + "epoch": 0.5921001173249902, + "loss": 0.09968466311693192, + "loss_ce": 0.002829501871019602, + "loss_iou": 0.2431640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 587249368, + "step": 6056 + }, + { + "epoch": 0.5921978881501759, + "grad_norm": 8.667795966904025, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 587346496, + "step": 6057 + }, + { + "epoch": 0.5921978881501759, + "loss": 0.07066822797060013, + "loss_ce": 0.004780776798725128, + "loss_iou": 0.302734375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 587346496, + "step": 6057 + }, + { + "epoch": 0.5922956589753617, + "grad_norm": 6.244914136925357, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 587444128, + "step": 6058 + }, + { + "epoch": 0.5922956589753617, + "loss": 0.11928987503051758, + "loss_ce": 0.005550863221287727, + "loss_iou": 0.330078125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 587444128, + "step": 6058 + }, + { + "epoch": 0.5923934298005475, + "grad_norm": 6.775460144073461, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 587541600, + "step": 6059 + }, + { + "epoch": 0.5923934298005475, + "loss": 0.05696626007556915, + "loss_ce": 0.002858594059944153, + "loss_iou": 0.251953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 587541600, + "step": 6059 + }, + { + "epoch": 0.5924912006257332, + "grad_norm": 12.353489004711982, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 587639440, + "step": 6060 + }, + { + "epoch": 0.5924912006257332, + "loss": 0.08397208154201508, + "loss_ce": 0.002810589736327529, + "loss_iou": 0.28125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 587639440, + "step": 6060 + }, + { + "epoch": 0.592588971450919, + "grad_norm": 15.608292565168856, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 587735644, + "step": 6061 + }, + { + "epoch": 0.592588971450919, + "loss": 0.11790208518505096, + "loss_ce": 0.006329814437776804, + "loss_iou": 0.5234375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 587735644, + "step": 6061 + }, + { + "epoch": 0.5926867422761048, + "grad_norm": 9.395184153362434, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 587832688, + "step": 6062 + }, + { + "epoch": 0.5926867422761048, + "loss": 0.052019812166690826, + "loss_ce": 0.0030390999745577574, + "loss_iou": 0.1787109375, + "loss_num": 0.009765625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 587832688, + "step": 6062 + }, + { + "epoch": 0.5927845131012905, + "grad_norm": 4.971232927854038, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 587930196, + "step": 6063 + }, + { + "epoch": 0.5927845131012905, + "loss": 0.04075121507048607, + "loss_ce": 0.0032031501177698374, + "loss_iou": 0.322265625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 587930196, + "step": 6063 + }, + { + "epoch": 0.5928822839264764, + "grad_norm": 5.630181467923369, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 588027188, + "step": 6064 + }, + { + "epoch": 0.5928822839264764, + "loss": 0.03665195032954216, + "loss_ce": 0.0036471900530159473, + "loss_iou": 0.26953125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 588027188, + "step": 6064 + }, + { + "epoch": 0.5929800547516622, + "grad_norm": 23.07322036119421, + "learning_rate": 5e-05, + "loss": 0.1327, + "num_input_tokens_seen": 588124188, + "step": 6065 + }, + { + "epoch": 0.5929800547516622, + "loss": 0.17263203859329224, + "loss_ce": 0.00869159959256649, + "loss_iou": 0.2265625, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 588124188, + "step": 6065 + }, + { + "epoch": 0.5930778255768479, + "grad_norm": 6.508236680902453, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 588220968, + "step": 6066 + }, + { + "epoch": 0.5930778255768479, + "loss": 0.09646173566579819, + "loss_ce": 0.0033831156324595213, + "loss_iou": 0.232421875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 588220968, + "step": 6066 + }, + { + "epoch": 0.5931755964020337, + "grad_norm": 6.590562862000458, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 588318648, + "step": 6067 + }, + { + "epoch": 0.5931755964020337, + "loss": 0.11907200515270233, + "loss_ce": 0.005355884321033955, + "loss_iou": 0.302734375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 588318648, + "step": 6067 + }, + { + "epoch": 0.5932733672272194, + "grad_norm": 15.310580908748552, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 588415748, + "step": 6068 + }, + { + "epoch": 0.5932733672272194, + "loss": 0.05161280930042267, + "loss_ce": 0.005905105732381344, + "loss_iou": 0.296875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 588415748, + "step": 6068 + }, + { + "epoch": 0.5933711380524052, + "grad_norm": 11.488591119440326, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 588512204, + "step": 6069 + }, + { + "epoch": 0.5933711380524052, + "loss": 0.0886269211769104, + "loss_ce": 0.010799473151564598, + "loss_iou": 0.28515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 588512204, + "step": 6069 + }, + { + "epoch": 0.593468908877591, + "grad_norm": 9.021058787345517, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 588608892, + "step": 6070 + }, + { + "epoch": 0.593468908877591, + "loss": 0.06322364509105682, + "loss_ce": 0.004141608718782663, + "loss_iou": 0.34765625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 588608892, + "step": 6070 + }, + { + "epoch": 0.5935666797027767, + "grad_norm": 3.301276821555707, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 588705012, + "step": 6071 + }, + { + "epoch": 0.5935666797027767, + "loss": 0.10022522509098053, + "loss_ce": 0.003652348183095455, + "loss_iou": 0.279296875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 588705012, + "step": 6071 + }, + { + "epoch": 0.5936644505279625, + "grad_norm": 4.431449372668451, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 588803040, + "step": 6072 + }, + { + "epoch": 0.5936644505279625, + "loss": 0.11143947392702103, + "loss_ce": 0.008457904681563377, + "loss_iou": 0.359375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 588803040, + "step": 6072 + }, + { + "epoch": 0.5937622213531483, + "grad_norm": 14.584397305599678, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 588899980, + "step": 6073 + }, + { + "epoch": 0.5937622213531483, + "loss": 0.07163534313440323, + "loss_ce": 0.0029860534705221653, + "loss_iou": 0.34765625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 588899980, + "step": 6073 + }, + { + "epoch": 0.593859992178334, + "grad_norm": 8.092464546572918, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 588997236, + "step": 6074 + }, + { + "epoch": 0.593859992178334, + "loss": 0.12212046980857849, + "loss_ce": 0.005024515558034182, + "loss_iou": 0.404296875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 588997236, + "step": 6074 + }, + { + "epoch": 0.5939577630035198, + "grad_norm": 4.491829893727446, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 589094208, + "step": 6075 + }, + { + "epoch": 0.5939577630035198, + "loss": 0.0569097176194191, + "loss_ce": 0.004945915658026934, + "loss_iou": 0.3125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 589094208, + "step": 6075 + }, + { + "epoch": 0.5940555338287055, + "grad_norm": 5.471824865785681, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 589190892, + "step": 6076 + }, + { + "epoch": 0.5940555338287055, + "loss": 0.07025671005249023, + "loss_ce": 0.002461904427036643, + "loss_iou": 0.328125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 589190892, + "step": 6076 + }, + { + "epoch": 0.5941533046538913, + "grad_norm": 12.662478416769211, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 589286892, + "step": 6077 + }, + { + "epoch": 0.5941533046538913, + "loss": 0.04597051441669464, + "loss_ce": 0.0031543539371341467, + "loss_iou": 0.326171875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 589286892, + "step": 6077 + }, + { + "epoch": 0.5942510754790771, + "grad_norm": 14.908924258016627, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 589382980, + "step": 6078 + }, + { + "epoch": 0.5942510754790771, + "loss": 0.06670764088630676, + "loss_ce": 0.004123717080801725, + "loss_iou": 0.259765625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 589382980, + "step": 6078 + }, + { + "epoch": 0.5943488463042628, + "grad_norm": 5.028327965983458, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 589479868, + "step": 6079 + }, + { + "epoch": 0.5943488463042628, + "loss": 0.10169363021850586, + "loss_ce": 0.0030608188826590776, + "loss_iou": 0.310546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 589479868, + "step": 6079 + }, + { + "epoch": 0.5944466171294486, + "grad_norm": 6.809291158171764, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 589575744, + "step": 6080 + }, + { + "epoch": 0.5944466171294486, + "loss": 0.08506210148334503, + "loss_ce": 0.0008183237514458597, + "loss_iou": 0.212890625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 589575744, + "step": 6080 + }, + { + "epoch": 0.5945443879546344, + "grad_norm": 1.935310828789636, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 589672188, + "step": 6081 + }, + { + "epoch": 0.5945443879546344, + "loss": 0.0481296107172966, + "loss_ce": 0.004718356300145388, + "loss_iou": 0.24609375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 589672188, + "step": 6081 + }, + { + "epoch": 0.5946421587798201, + "grad_norm": 16.775130368699433, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 589769200, + "step": 6082 + }, + { + "epoch": 0.5946421587798201, + "loss": 0.09925763309001923, + "loss_ce": 0.011229678988456726, + "loss_iou": 0.2041015625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 589769200, + "step": 6082 + }, + { + "epoch": 0.5947399296050059, + "grad_norm": 15.07349497802953, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 589866276, + "step": 6083 + }, + { + "epoch": 0.5947399296050059, + "loss": 0.0725041851401329, + "loss_ce": 0.010385657660663128, + "loss_iou": 0.3125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 589866276, + "step": 6083 + }, + { + "epoch": 0.5948377004301917, + "grad_norm": 3.118589401282414, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 589962452, + "step": 6084 + }, + { + "epoch": 0.5948377004301917, + "loss": 0.05374257266521454, + "loss_ce": 0.006707355845719576, + "loss_iou": 0.2470703125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 589962452, + "step": 6084 + }, + { + "epoch": 0.5949354712553774, + "grad_norm": 4.2082646630515175, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 590059328, + "step": 6085 + }, + { + "epoch": 0.5949354712553774, + "loss": 0.08157580345869064, + "loss_ce": 0.00647204602137208, + "loss_iou": 0.28515625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 590059328, + "step": 6085 + }, + { + "epoch": 0.5950332420805632, + "grad_norm": 3.352486980976612, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 590156732, + "step": 6086 + }, + { + "epoch": 0.5950332420805632, + "loss": 0.08058266341686249, + "loss_ce": 0.003647854086011648, + "loss_iou": 0.23828125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 590156732, + "step": 6086 + }, + { + "epoch": 0.5951310129057489, + "grad_norm": 13.353240506966358, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 590254108, + "step": 6087 + }, + { + "epoch": 0.5951310129057489, + "loss": 0.05587387830018997, + "loss_ce": 0.0021019079722464085, + "loss_iou": 0.326171875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 590254108, + "step": 6087 + }, + { + "epoch": 0.5952287837309347, + "grad_norm": 5.734871973215112, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 590351192, + "step": 6088 + }, + { + "epoch": 0.5952287837309347, + "loss": 0.056514520198106766, + "loss_ce": 0.0034139342606067657, + "loss_iou": 0.439453125, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 590351192, + "step": 6088 + }, + { + "epoch": 0.5953265545561205, + "grad_norm": 20.873286084753378, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 590447764, + "step": 6089 + }, + { + "epoch": 0.5953265545561205, + "loss": 0.13057063519954681, + "loss_ce": 0.007798417471349239, + "loss_iou": 0.2451171875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 590447764, + "step": 6089 + }, + { + "epoch": 0.5954243253813062, + "grad_norm": 5.289782389719434, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 590545268, + "step": 6090 + }, + { + "epoch": 0.5954243253813062, + "loss": 0.08731621503829956, + "loss_ce": 0.00816887803375721, + "loss_iou": 0.28125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 590545268, + "step": 6090 + }, + { + "epoch": 0.595522096206492, + "grad_norm": 2.712854832972042, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 590642032, + "step": 6091 + }, + { + "epoch": 0.595522096206492, + "loss": 0.04879424721002579, + "loss_ce": 0.004559015855193138, + "loss_iou": 0.259765625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 590642032, + "step": 6091 + }, + { + "epoch": 0.5956198670316778, + "grad_norm": 4.940405093112243, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 590739364, + "step": 6092 + }, + { + "epoch": 0.5956198670316778, + "loss": 0.06967590749263763, + "loss_ce": 0.003918162081390619, + "loss_iou": 0.279296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 590739364, + "step": 6092 + }, + { + "epoch": 0.5957176378568635, + "grad_norm": 8.542968499844418, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 590837244, + "step": 6093 + }, + { + "epoch": 0.5957176378568635, + "loss": 0.10586895048618317, + "loss_ce": 0.004153858404606581, + "loss_iou": 0.3515625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 590837244, + "step": 6093 + }, + { + "epoch": 0.5958154086820493, + "grad_norm": 5.549097345483859, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 590934612, + "step": 6094 + }, + { + "epoch": 0.5958154086820493, + "loss": 0.060106389224529266, + "loss_ce": 0.009981267154216766, + "loss_iou": 0.4375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 590934612, + "step": 6094 + }, + { + "epoch": 0.595913179507235, + "grad_norm": 7.053828445686979, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 591031840, + "step": 6095 + }, + { + "epoch": 0.595913179507235, + "loss": 0.04370059072971344, + "loss_ce": 0.003985777031630278, + "loss_iou": 0.29296875, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 591031840, + "step": 6095 + }, + { + "epoch": 0.5960109503324208, + "grad_norm": 15.821219138793678, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 591128140, + "step": 6096 + }, + { + "epoch": 0.5960109503324208, + "loss": 0.07534629106521606, + "loss_ce": 0.005041417200118303, + "loss_iou": 0.37109375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 591128140, + "step": 6096 + }, + { + "epoch": 0.5961087211576066, + "grad_norm": 15.73766877721473, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 591226012, + "step": 6097 + }, + { + "epoch": 0.5961087211576066, + "loss": 0.06369535624980927, + "loss_ce": 0.0028585665859282017, + "loss_iou": 0.419921875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 591226012, + "step": 6097 + }, + { + "epoch": 0.5962064919827923, + "grad_norm": 8.279566804895158, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 591323536, + "step": 6098 + }, + { + "epoch": 0.5962064919827923, + "loss": 0.11267949640750885, + "loss_ce": 0.00547124445438385, + "loss_iou": 0.298828125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 591323536, + "step": 6098 + }, + { + "epoch": 0.5963042628079781, + "grad_norm": 11.362801944490677, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 591421784, + "step": 6099 + }, + { + "epoch": 0.5963042628079781, + "loss": 0.09831295162439346, + "loss_ce": 0.005173308774828911, + "loss_iou": 0.35546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 591421784, + "step": 6099 + }, + { + "epoch": 0.5964020336331639, + "grad_norm": 4.905409060515362, + "learning_rate": 5e-05, + "loss": 0.119, + "num_input_tokens_seen": 591518312, + "step": 6100 + }, + { + "epoch": 0.5964020336331639, + "loss": 0.1296902596950531, + "loss_ce": 0.005193804856389761, + "loss_iou": 0.337890625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 591518312, + "step": 6100 + }, + { + "epoch": 0.5964998044583496, + "grad_norm": 3.2952595114022105, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 591614932, + "step": 6101 + }, + { + "epoch": 0.5964998044583496, + "loss": 0.07887504249811172, + "loss_ce": 0.00735709723085165, + "loss_iou": 0.275390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 591614932, + "step": 6101 + }, + { + "epoch": 0.5965975752835354, + "grad_norm": 9.139456000351505, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 591712576, + "step": 6102 + }, + { + "epoch": 0.5965975752835354, + "loss": 0.1088753491640091, + "loss_ce": 0.00418479647487402, + "loss_iou": 0.322265625, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 591712576, + "step": 6102 + }, + { + "epoch": 0.5966953461087211, + "grad_norm": 9.073573274946583, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 591809468, + "step": 6103 + }, + { + "epoch": 0.5966953461087211, + "loss": 0.05803034454584122, + "loss_ce": 0.004929760936647654, + "loss_iou": 0.283203125, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 591809468, + "step": 6103 + }, + { + "epoch": 0.5967931169339069, + "grad_norm": 6.1438370977077765, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 591906560, + "step": 6104 + }, + { + "epoch": 0.5967931169339069, + "loss": 0.060295186936855316, + "loss_ce": 0.0048294877633452415, + "loss_iou": 0.423828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 591906560, + "step": 6104 + }, + { + "epoch": 0.5968908877590927, + "grad_norm": 4.1565923866574845, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 592003548, + "step": 6105 + }, + { + "epoch": 0.5968908877590927, + "loss": 0.05462738871574402, + "loss_ce": 0.0038156178779900074, + "loss_iou": 0.27734375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 592003548, + "step": 6105 + }, + { + "epoch": 0.5969886585842784, + "grad_norm": 2.561804448827827, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 592101524, + "step": 6106 + }, + { + "epoch": 0.5969886585842784, + "loss": 0.07384395599365234, + "loss_ce": 0.009253503754734993, + "loss_iou": 0.3046875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 592101524, + "step": 6106 + }, + { + "epoch": 0.5970864294094642, + "grad_norm": 7.538856108472281, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 592198700, + "step": 6107 + }, + { + "epoch": 0.5970864294094642, + "loss": 0.05411871522665024, + "loss_ce": 0.00531729543581605, + "loss_iou": 0.21875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 592198700, + "step": 6107 + }, + { + "epoch": 0.59718420023465, + "grad_norm": 5.391023511406089, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 592295288, + "step": 6108 + }, + { + "epoch": 0.59718420023465, + "loss": 0.07145624607801437, + "loss_ce": 0.00405817711725831, + "loss_iou": 0.345703125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 592295288, + "step": 6108 + }, + { + "epoch": 0.5972819710598357, + "grad_norm": 2.992886911358819, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 592391928, + "step": 6109 + }, + { + "epoch": 0.5972819710598357, + "loss": 0.07146042585372925, + "loss_ce": 0.004428562708199024, + "loss_iou": 0.22265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 592391928, + "step": 6109 + }, + { + "epoch": 0.5973797418850215, + "grad_norm": 21.700335426724106, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 592488816, + "step": 6110 + }, + { + "epoch": 0.5973797418850215, + "loss": 0.07266457378864288, + "loss_ce": 0.008226708509027958, + "loss_iou": 0.30078125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 592488816, + "step": 6110 + }, + { + "epoch": 0.5974775127102073, + "grad_norm": 37.61666049421471, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 592585012, + "step": 6111 + }, + { + "epoch": 0.5974775127102073, + "loss": 0.09873946011066437, + "loss_ce": 0.005996535066515207, + "loss_iou": 0.345703125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 592585012, + "step": 6111 + }, + { + "epoch": 0.597575283535393, + "grad_norm": 14.599319214895464, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 592682136, + "step": 6112 + }, + { + "epoch": 0.597575283535393, + "loss": 0.08583233505487442, + "loss_ce": 0.00654766708612442, + "loss_iou": 0.318359375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 592682136, + "step": 6112 + }, + { + "epoch": 0.5976730543605788, + "grad_norm": 23.204489047994056, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 592778996, + "step": 6113 + }, + { + "epoch": 0.5976730543605788, + "loss": 0.08312010020017624, + "loss_ce": 0.002874126425012946, + "loss_iou": 0.296875, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 592778996, + "step": 6113 + }, + { + "epoch": 0.5977708251857645, + "grad_norm": 2.2544792041330366, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 592874484, + "step": 6114 + }, + { + "epoch": 0.5977708251857645, + "loss": 0.060843463987112045, + "loss_ce": 0.004202837124466896, + "loss_iou": 0.240234375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 592874484, + "step": 6114 + }, + { + "epoch": 0.5978685960109503, + "grad_norm": 14.389163539545278, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 592970808, + "step": 6115 + }, + { + "epoch": 0.5978685960109503, + "loss": 0.08058750629425049, + "loss_ce": 0.0054379659704864025, + "loss_iou": 0.1845703125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 592970808, + "step": 6115 + }, + { + "epoch": 0.5979663668361361, + "grad_norm": 3.6051784139925775, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 593066644, + "step": 6116 + }, + { + "epoch": 0.5979663668361361, + "loss": 0.08695001900196075, + "loss_ce": 0.007779785431921482, + "loss_iou": 0.279296875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 593066644, + "step": 6116 + }, + { + "epoch": 0.5980641376613218, + "grad_norm": 2.9474654202206962, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 593163796, + "step": 6117 + }, + { + "epoch": 0.5980641376613218, + "loss": 0.0653735026717186, + "loss_ce": 0.0037203682586550713, + "loss_iou": 0.275390625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 593163796, + "step": 6117 + }, + { + "epoch": 0.5981619084865076, + "grad_norm": 3.4552642164694447, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 593260304, + "step": 6118 + }, + { + "epoch": 0.5981619084865076, + "loss": 0.05759841948747635, + "loss_ce": 0.0033000160474330187, + "loss_iou": 0.302734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 593260304, + "step": 6118 + }, + { + "epoch": 0.5982596793116934, + "grad_norm": 2.970419348505961, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 593357360, + "step": 6119 + }, + { + "epoch": 0.5982596793116934, + "loss": 0.07381469011306763, + "loss_ce": 0.016472158953547478, + "loss_iou": 0.2890625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 593357360, + "step": 6119 + }, + { + "epoch": 0.5983574501368791, + "grad_norm": 8.705541174079908, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 593454036, + "step": 6120 + }, + { + "epoch": 0.5983574501368791, + "loss": 0.05389280617237091, + "loss_ce": 0.0019824057817459106, + "loss_iou": 0.24609375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 593454036, + "step": 6120 + }, + { + "epoch": 0.5984552209620649, + "grad_norm": 12.318389187669304, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 593550704, + "step": 6121 + }, + { + "epoch": 0.5984552209620649, + "loss": 0.06138532608747482, + "loss_ce": 0.0035087394062429667, + "loss_iou": 0.279296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 593550704, + "step": 6121 + }, + { + "epoch": 0.5985529917872506, + "grad_norm": 4.403240652290818, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 593647532, + "step": 6122 + }, + { + "epoch": 0.5985529917872506, + "loss": 0.0355813205242157, + "loss_ce": 0.005002705845981836, + "loss_iou": 0.28125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 593647532, + "step": 6122 + }, + { + "epoch": 0.5986507626124364, + "grad_norm": 6.111969396397834, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 593743984, + "step": 6123 + }, + { + "epoch": 0.5986507626124364, + "loss": 0.09305937588214874, + "loss_ce": 0.007152390666306019, + "loss_iou": 0.2333984375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 593743984, + "step": 6123 + }, + { + "epoch": 0.5987485334376222, + "grad_norm": 28.125458124176525, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 593841152, + "step": 6124 + }, + { + "epoch": 0.5987485334376222, + "loss": 0.12393549084663391, + "loss_ce": 0.0026738913729786873, + "loss_iou": 0.3046875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 593841152, + "step": 6124 + }, + { + "epoch": 0.5988463042628079, + "grad_norm": 19.36725630989001, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 593938244, + "step": 6125 + }, + { + "epoch": 0.5988463042628079, + "loss": 0.08492790162563324, + "loss_ce": 0.007169118616729975, + "loss_iou": 0.416015625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 593938244, + "step": 6125 + }, + { + "epoch": 0.5989440750879937, + "grad_norm": 2.9965132369349345, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 594035052, + "step": 6126 + }, + { + "epoch": 0.5989440750879937, + "loss": 0.06626957654953003, + "loss_ce": 0.009079639799892902, + "loss_iou": 0.27734375, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 594035052, + "step": 6126 + }, + { + "epoch": 0.5990418459131795, + "grad_norm": 5.461881164135886, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 594132080, + "step": 6127 + }, + { + "epoch": 0.5990418459131795, + "loss": 0.05905582755804062, + "loss_ce": 0.0018658817280083895, + "loss_iou": 0.369140625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 594132080, + "step": 6127 + }, + { + "epoch": 0.5991396167383652, + "grad_norm": 6.585234137946175, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 594229572, + "step": 6128 + }, + { + "epoch": 0.5991396167383652, + "loss": 0.07239861786365509, + "loss_ce": 0.0025476962327957153, + "loss_iou": 0.359375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 594229572, + "step": 6128 + }, + { + "epoch": 0.599237387563551, + "grad_norm": 9.945277670609597, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 594327784, + "step": 6129 + }, + { + "epoch": 0.599237387563551, + "loss": 0.0655999630689621, + "loss_ce": 0.010271597653627396, + "loss_iou": 0.306640625, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 594327784, + "step": 6129 + }, + { + "epoch": 0.5993351583887369, + "grad_norm": 9.93376743215581, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 594424416, + "step": 6130 + }, + { + "epoch": 0.5993351583887369, + "loss": 0.129083514213562, + "loss_ce": 0.00884426198899746, + "loss_iou": 0.294921875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 594424416, + "step": 6130 + }, + { + "epoch": 0.5994329292139225, + "grad_norm": 8.35297221892422, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 594523084, + "step": 6131 + }, + { + "epoch": 0.5994329292139225, + "loss": 0.07405191659927368, + "loss_ce": 0.004380290396511555, + "loss_iou": 0.3828125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 594523084, + "step": 6131 + }, + { + "epoch": 0.5995307000391084, + "grad_norm": 2.761030190734339, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 594620092, + "step": 6132 + }, + { + "epoch": 0.5995307000391084, + "loss": 0.0893387719988823, + "loss_ce": 0.0055451346561312675, + "loss_iou": 0.21484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 594620092, + "step": 6132 + }, + { + "epoch": 0.599628470864294, + "grad_norm": 7.650248959734284, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 594716444, + "step": 6133 + }, + { + "epoch": 0.599628470864294, + "loss": 0.07196912169456482, + "loss_ce": 0.0032435364555567503, + "loss_iou": 0.201171875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 594716444, + "step": 6133 + }, + { + "epoch": 0.5997262416894799, + "grad_norm": 4.678221944996361, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 594813296, + "step": 6134 + }, + { + "epoch": 0.5997262416894799, + "loss": 0.12527740001678467, + "loss_ce": 0.007372725754976273, + "loss_iou": 0.318359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 594813296, + "step": 6134 + }, + { + "epoch": 0.5998240125146657, + "grad_norm": 8.284786466015879, + "learning_rate": 5e-05, + "loss": 0.122, + "num_input_tokens_seen": 594910632, + "step": 6135 + }, + { + "epoch": 0.5998240125146657, + "loss": 0.11515127122402191, + "loss_ce": 0.0038231497164815664, + "loss_iou": 0.451171875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 594910632, + "step": 6135 + }, + { + "epoch": 0.5999217833398514, + "grad_norm": 21.154942231961478, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 595007772, + "step": 6136 + }, + { + "epoch": 0.5999217833398514, + "loss": 0.05326968431472778, + "loss_ce": 0.0055249352008104324, + "loss_iou": 0.30859375, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 595007772, + "step": 6136 + }, + { + "epoch": 0.6000195541650372, + "grad_norm": 6.7200038122068895, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 595104344, + "step": 6137 + }, + { + "epoch": 0.6000195541650372, + "loss": 0.09610690176486969, + "loss_ce": 0.00640048086643219, + "loss_iou": 0.3828125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 595104344, + "step": 6137 + }, + { + "epoch": 0.600117324990223, + "grad_norm": 6.339155116494092, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 595201556, + "step": 6138 + }, + { + "epoch": 0.600117324990223, + "loss": 0.05232642590999603, + "loss_ce": 0.005260692909359932, + "loss_iou": 0.220703125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 595201556, + "step": 6138 + }, + { + "epoch": 0.6002150958154087, + "grad_norm": 6.669847912713828, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 595298824, + "step": 6139 + }, + { + "epoch": 0.6002150958154087, + "loss": 0.08991322666406631, + "loss_ce": 0.006230215076357126, + "loss_iou": 0.26171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 595298824, + "step": 6139 + }, + { + "epoch": 0.6003128666405945, + "grad_norm": 2.5083941155449434, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 595396536, + "step": 6140 + }, + { + "epoch": 0.6003128666405945, + "loss": 0.052882298827171326, + "loss_ce": 0.006487949751317501, + "loss_iou": 0.2578125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 595396536, + "step": 6140 + }, + { + "epoch": 0.6004106374657802, + "grad_norm": 7.0548811165904795, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 595493780, + "step": 6141 + }, + { + "epoch": 0.6004106374657802, + "loss": 0.08949659764766693, + "loss_ce": 0.0062751565128564835, + "loss_iou": 0.310546875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 595493780, + "step": 6141 + }, + { + "epoch": 0.600508408290966, + "grad_norm": 6.9110952319463905, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 595590980, + "step": 6142 + }, + { + "epoch": 0.600508408290966, + "loss": 0.07116223871707916, + "loss_ce": 0.00371839152649045, + "loss_iou": 0.279296875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 595590980, + "step": 6142 + }, + { + "epoch": 0.6006061791161518, + "grad_norm": 20.131323888961205, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 595688208, + "step": 6143 + }, + { + "epoch": 0.6006061791161518, + "loss": 0.034667983651161194, + "loss_ce": 0.002143878024071455, + "loss_iou": 0.26953125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 595688208, + "step": 6143 + }, + { + "epoch": 0.6007039499413375, + "grad_norm": 29.910811458238534, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 595784872, + "step": 6144 + }, + { + "epoch": 0.6007039499413375, + "loss": 0.05452585592865944, + "loss_ce": 0.007421974092721939, + "loss_iou": 0.279296875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 595784872, + "step": 6144 + }, + { + "epoch": 0.6008017207665233, + "grad_norm": 10.076946387620609, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 595882256, + "step": 6145 + }, + { + "epoch": 0.6008017207665233, + "loss": 0.06105174496769905, + "loss_ce": 0.0027707992121577263, + "loss_iou": 0.314453125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 595882256, + "step": 6145 + }, + { + "epoch": 0.6008994915917091, + "grad_norm": 3.837984440023297, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 595979632, + "step": 6146 + }, + { + "epoch": 0.6008994915917091, + "loss": 0.09269267320632935, + "loss_ce": 0.005320846103131771, + "loss_iou": 0.333984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 595979632, + "step": 6146 + }, + { + "epoch": 0.6009972624168948, + "grad_norm": 7.3391206024461475, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 596075996, + "step": 6147 + }, + { + "epoch": 0.6009972624168948, + "loss": 0.13877028226852417, + "loss_ce": 0.01117628812789917, + "loss_iou": 0.255859375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 596075996, + "step": 6147 + }, + { + "epoch": 0.6010950332420806, + "grad_norm": 5.612135187844279, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 596172056, + "step": 6148 + }, + { + "epoch": 0.6010950332420806, + "loss": 0.08244965970516205, + "loss_ce": 0.0038974101189523935, + "loss_iou": 0.302734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 596172056, + "step": 6148 + }, + { + "epoch": 0.6011928040672663, + "grad_norm": 2.736448526573842, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 596268704, + "step": 6149 + }, + { + "epoch": 0.6011928040672663, + "loss": 0.07266900688409805, + "loss_ce": 0.0021428843028843403, + "loss_iou": 0.2265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 596268704, + "step": 6149 + }, + { + "epoch": 0.6012905748924521, + "grad_norm": 4.664947256386979, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 596365284, + "step": 6150 + }, + { + "epoch": 0.6012905748924521, + "loss": 0.06518116593360901, + "loss_ce": 0.0016969757853075862, + "loss_iou": 0.26953125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 596365284, + "step": 6150 + }, + { + "epoch": 0.6013883457176379, + "grad_norm": 7.075096102652965, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 596461964, + "step": 6151 + }, + { + "epoch": 0.6013883457176379, + "loss": 0.12660904228687286, + "loss_ce": 0.009459687396883965, + "loss_iou": 0.32421875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 596461964, + "step": 6151 + }, + { + "epoch": 0.6014861165428236, + "grad_norm": 4.801848782006466, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 596558688, + "step": 6152 + }, + { + "epoch": 0.6014861165428236, + "loss": 0.07705503702163696, + "loss_ce": 0.00701720267534256, + "loss_iou": 0.26171875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 596558688, + "step": 6152 + }, + { + "epoch": 0.6015838873680094, + "grad_norm": 11.827776999408643, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 596654660, + "step": 6153 + }, + { + "epoch": 0.6015838873680094, + "loss": 0.08095230162143707, + "loss_ce": 0.0035749850794672966, + "loss_iou": 0.3515625, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 596654660, + "step": 6153 + }, + { + "epoch": 0.6016816581931952, + "grad_norm": 29.108383582770802, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 596751656, + "step": 6154 + }, + { + "epoch": 0.6016816581931952, + "loss": 0.06869318336248398, + "loss_ce": 0.007932684384286404, + "loss_iou": 0.298828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 596751656, + "step": 6154 + }, + { + "epoch": 0.6017794290183809, + "grad_norm": 10.287762236001074, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 596849128, + "step": 6155 + }, + { + "epoch": 0.6017794290183809, + "loss": 0.04029126465320587, + "loss_ce": 0.0042042299173772335, + "loss_iou": 0.28125, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 596849128, + "step": 6155 + }, + { + "epoch": 0.6018771998435667, + "grad_norm": 10.857047255937848, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 596945304, + "step": 6156 + }, + { + "epoch": 0.6018771998435667, + "loss": 0.09432666003704071, + "loss_ce": 0.0066229477524757385, + "loss_iou": 0.240234375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 596945304, + "step": 6156 + }, + { + "epoch": 0.6019749706687525, + "grad_norm": 20.86463450409109, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 597041800, + "step": 6157 + }, + { + "epoch": 0.6019749706687525, + "loss": 0.06955278664827347, + "loss_ce": 0.004947074688971043, + "loss_iou": 0.263671875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 597041800, + "step": 6157 + }, + { + "epoch": 0.6020727414939382, + "grad_norm": 11.621830981532868, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 597138848, + "step": 6158 + }, + { + "epoch": 0.6020727414939382, + "loss": 0.11180730909109116, + "loss_ce": 0.005102592520415783, + "loss_iou": 0.234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 597138848, + "step": 6158 + }, + { + "epoch": 0.602170512319124, + "grad_norm": 16.74036741690962, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 597236544, + "step": 6159 + }, + { + "epoch": 0.602170512319124, + "loss": 0.087007537484169, + "loss_ce": 0.006929419469088316, + "loss_iou": 0.37109375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 597236544, + "step": 6159 + }, + { + "epoch": 0.6022682831443097, + "grad_norm": 13.425948333042538, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 597333728, + "step": 6160 + }, + { + "epoch": 0.6022682831443097, + "loss": 0.05913897231221199, + "loss_ce": 0.0048787156119942665, + "loss_iou": 0.31640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 597333728, + "step": 6160 + }, + { + "epoch": 0.6023660539694955, + "grad_norm": 12.962250933467502, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 597430332, + "step": 6161 + }, + { + "epoch": 0.6023660539694955, + "loss": 0.05610189214348793, + "loss_ce": 0.00777730718255043, + "loss_iou": 0.271484375, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 597430332, + "step": 6161 + }, + { + "epoch": 0.6024638247946813, + "grad_norm": 16.024912526227716, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 597528532, + "step": 6162 + }, + { + "epoch": 0.6024638247946813, + "loss": 0.10620345920324326, + "loss_ce": 0.00793685857206583, + "loss_iou": 0.28515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 597528532, + "step": 6162 + }, + { + "epoch": 0.602561595619867, + "grad_norm": 11.005544682094655, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 597625828, + "step": 6163 + }, + { + "epoch": 0.602561595619867, + "loss": 0.07922928035259247, + "loss_ce": 0.002782742492854595, + "loss_iou": 0.400390625, + "loss_num": 0.0152587890625, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 597625828, + "step": 6163 + }, + { + "epoch": 0.6026593664450528, + "grad_norm": 4.50585426507783, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 597722628, + "step": 6164 + }, + { + "epoch": 0.6026593664450528, + "loss": 0.09441350400447845, + "loss_ce": 0.008567560464143753, + "loss_iou": 0.34765625, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 597722628, + "step": 6164 + }, + { + "epoch": 0.6027571372702386, + "grad_norm": 30.46045805393274, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 597820760, + "step": 6165 + }, + { + "epoch": 0.6027571372702386, + "loss": 0.07117302715778351, + "loss_ce": 0.005300840828567743, + "loss_iou": 0.41015625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 597820760, + "step": 6165 + }, + { + "epoch": 0.6028549080954243, + "grad_norm": 61.57088132947777, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 597918692, + "step": 6166 + }, + { + "epoch": 0.6028549080954243, + "loss": 0.10217253863811493, + "loss_ce": 0.0050350879319012165, + "loss_iou": 0.31640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 597918692, + "step": 6166 + }, + { + "epoch": 0.6029526789206101, + "grad_norm": 31.439414872762427, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 598015628, + "step": 6167 + }, + { + "epoch": 0.6029526789206101, + "loss": 0.09066583216190338, + "loss_ce": 0.004140877164900303, + "loss_iou": 0.322265625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 598015628, + "step": 6167 + }, + { + "epoch": 0.6030504497457958, + "grad_norm": 3.653035907330679, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 598113372, + "step": 6168 + }, + { + "epoch": 0.6030504497457958, + "loss": 0.05341089516878128, + "loss_ce": 0.0021413620561361313, + "loss_iou": 0.302734375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 598113372, + "step": 6168 + }, + { + "epoch": 0.6031482205709816, + "grad_norm": 3.8106787355153076, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 598209984, + "step": 6169 + }, + { + "epoch": 0.6031482205709816, + "loss": 0.07673527300357819, + "loss_ce": 0.00421787379309535, + "loss_iou": 0.205078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 598209984, + "step": 6169 + }, + { + "epoch": 0.6032459913961674, + "grad_norm": 4.2941574722911575, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 598306872, + "step": 6170 + }, + { + "epoch": 0.6032459913961674, + "loss": 0.06077868491411209, + "loss_ce": 0.005747861694544554, + "loss_iou": 0.287109375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 598306872, + "step": 6170 + }, + { + "epoch": 0.6033437622213531, + "grad_norm": 3.169134386655368, + "learning_rate": 5e-05, + "loss": 0.1186, + "num_input_tokens_seen": 598403764, + "step": 6171 + }, + { + "epoch": 0.6033437622213531, + "loss": 0.13908590376377106, + "loss_ce": 0.0053731356747448444, + "loss_iou": 0.2578125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 598403764, + "step": 6171 + }, + { + "epoch": 0.6034415330465389, + "grad_norm": 4.148892239473186, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 598500300, + "step": 6172 + }, + { + "epoch": 0.6034415330465389, + "loss": 0.06459694355726242, + "loss_ce": 0.00609474815428257, + "loss_iou": 0.2412109375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 598500300, + "step": 6172 + }, + { + "epoch": 0.6035393038717247, + "grad_norm": 16.89145130552725, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 598597484, + "step": 6173 + }, + { + "epoch": 0.6035393038717247, + "loss": 0.11103927344083786, + "loss_ce": 0.005906220525503159, + "loss_iou": 0.3515625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 598597484, + "step": 6173 + }, + { + "epoch": 0.6036370746969104, + "grad_norm": 18.257407625993018, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 598692996, + "step": 6174 + }, + { + "epoch": 0.6036370746969104, + "loss": 0.06693565100431442, + "loss_ce": 0.003047102829441428, + "loss_iou": 0.2080078125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 598692996, + "step": 6174 + }, + { + "epoch": 0.6037348455220962, + "grad_norm": 3.847206256206329, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 598790544, + "step": 6175 + }, + { + "epoch": 0.6037348455220962, + "loss": 0.07609731703996658, + "loss_ce": 0.0048082564026117325, + "loss_iou": 0.353515625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 598790544, + "step": 6175 + }, + { + "epoch": 0.603832616347282, + "grad_norm": 4.5627453891566425, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 598887236, + "step": 6176 + }, + { + "epoch": 0.603832616347282, + "loss": 0.08416805416345596, + "loss_ce": 0.004562955349683762, + "loss_iou": 0.2119140625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 598887236, + "step": 6176 + }, + { + "epoch": 0.6039303871724677, + "grad_norm": 5.523461934690972, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 598984308, + "step": 6177 + }, + { + "epoch": 0.6039303871724677, + "loss": 0.05451950803399086, + "loss_ce": 0.00510010588914156, + "loss_iou": 0.220703125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 598984308, + "step": 6177 + }, + { + "epoch": 0.6040281579976535, + "grad_norm": 20.726203939749055, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 599082152, + "step": 6178 + }, + { + "epoch": 0.6040281579976535, + "loss": 0.05542078614234924, + "loss_ce": 0.004898934159427881, + "loss_iou": 0.310546875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 599082152, + "step": 6178 + }, + { + "epoch": 0.6041259288228392, + "grad_norm": 3.647136193952006, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 599179040, + "step": 6179 + }, + { + "epoch": 0.6041259288228392, + "loss": 0.09166188538074493, + "loss_ce": 0.00840993132442236, + "loss_iou": 0.318359375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 599179040, + "step": 6179 + }, + { + "epoch": 0.604223699648025, + "grad_norm": 3.3294991974300925, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 599275104, + "step": 6180 + }, + { + "epoch": 0.604223699648025, + "loss": 0.06700321286916733, + "loss_ce": 0.0035266499035060406, + "loss_iou": 0.27734375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 599275104, + "step": 6180 + }, + { + "epoch": 0.6043214704732108, + "grad_norm": 10.642497984120174, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 599372292, + "step": 6181 + }, + { + "epoch": 0.6043214704732108, + "loss": 0.0702056735754013, + "loss_ce": 0.002136219758540392, + "loss_iou": 0.28125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 599372292, + "step": 6181 + }, + { + "epoch": 0.6044192412983965, + "grad_norm": 8.674444319552794, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 599469088, + "step": 6182 + }, + { + "epoch": 0.6044192412983965, + "loss": 0.10177655518054962, + "loss_ce": 0.009285408072173595, + "loss_iou": 0.2578125, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 599469088, + "step": 6182 + }, + { + "epoch": 0.6045170121235823, + "grad_norm": 5.016660430941252, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 599565368, + "step": 6183 + }, + { + "epoch": 0.6045170121235823, + "loss": 0.04810076951980591, + "loss_ce": 0.00607806583866477, + "loss_iou": 0.2060546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 599565368, + "step": 6183 + }, + { + "epoch": 0.6046147829487681, + "grad_norm": 6.3079100326472135, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 599663300, + "step": 6184 + }, + { + "epoch": 0.6046147829487681, + "loss": 0.08518795669078827, + "loss_ce": 0.0028820491861552, + "loss_iou": 0.349609375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 599663300, + "step": 6184 + }, + { + "epoch": 0.6047125537739538, + "grad_norm": 6.054809468277507, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 599760504, + "step": 6185 + }, + { + "epoch": 0.6047125537739538, + "loss": 0.0306114349514246, + "loss_ce": 0.0017799531342461705, + "loss_iou": 0.255859375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 599760504, + "step": 6185 + }, + { + "epoch": 0.6048103245991396, + "grad_norm": 5.073619869001059, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 599857172, + "step": 6186 + }, + { + "epoch": 0.6048103245991396, + "loss": 0.08655547350645065, + "loss_ce": 0.006034838501363993, + "loss_iou": 0.296875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 599857172, + "step": 6186 + }, + { + "epoch": 0.6049080954243253, + "grad_norm": 9.782860478693898, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 599953356, + "step": 6187 + }, + { + "epoch": 0.6049080954243253, + "loss": 0.09579246491193771, + "loss_ce": 0.004007035866379738, + "loss_iou": 0.2333984375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 599953356, + "step": 6187 + }, + { + "epoch": 0.6050058662495111, + "grad_norm": 2.488221581522644, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 600051104, + "step": 6188 + }, + { + "epoch": 0.6050058662495111, + "loss": 0.038360580801963806, + "loss_ce": 0.0030517415143549442, + "loss_iou": 0.37109375, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 600051104, + "step": 6188 + }, + { + "epoch": 0.6051036370746969, + "grad_norm": 11.856069232086485, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 600148400, + "step": 6189 + }, + { + "epoch": 0.6051036370746969, + "loss": 0.0854625403881073, + "loss_ce": 0.006483048666268587, + "loss_iou": 0.375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 600148400, + "step": 6189 + }, + { + "epoch": 0.6052014078998826, + "grad_norm": 12.441757485839643, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 600245604, + "step": 6190 + }, + { + "epoch": 0.6052014078998826, + "loss": 0.06077270582318306, + "loss_ce": 0.0046508777886629105, + "loss_iou": 0.298828125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 600245604, + "step": 6190 + }, + { + "epoch": 0.6052991787250684, + "grad_norm": 13.170036579380916, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 600342472, + "step": 6191 + }, + { + "epoch": 0.6052991787250684, + "loss": 0.11101450026035309, + "loss_ce": 0.0015937198186293244, + "loss_iou": 0.265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 600342472, + "step": 6191 + }, + { + "epoch": 0.6053969495502542, + "grad_norm": 9.426735383206951, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 600440440, + "step": 6192 + }, + { + "epoch": 0.6053969495502542, + "loss": 0.07159893214702606, + "loss_ce": 0.0047043995000422, + "loss_iou": 0.3828125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 600440440, + "step": 6192 + }, + { + "epoch": 0.6054947203754399, + "grad_norm": 10.78470075092725, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 600536532, + "step": 6193 + }, + { + "epoch": 0.6054947203754399, + "loss": 0.10727725923061371, + "loss_ce": 0.0035174968652427197, + "loss_iou": 0.158203125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 600536532, + "step": 6193 + }, + { + "epoch": 0.6055924912006257, + "grad_norm": 3.1337851358086684, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 600632796, + "step": 6194 + }, + { + "epoch": 0.6055924912006257, + "loss": 0.04126347601413727, + "loss_ce": 0.004024398513138294, + "loss_iou": 0.267578125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 600632796, + "step": 6194 + }, + { + "epoch": 0.6056902620258114, + "grad_norm": 2.280904797736403, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 600729452, + "step": 6195 + }, + { + "epoch": 0.6056902620258114, + "loss": 0.048145994544029236, + "loss_ce": 0.00661920290440321, + "loss_iou": 0.271484375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 600729452, + "step": 6195 + }, + { + "epoch": 0.6057880328509972, + "grad_norm": 6.4188660818163905, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 600825548, + "step": 6196 + }, + { + "epoch": 0.6057880328509972, + "loss": 0.08045095205307007, + "loss_ce": 0.0020665512420237064, + "loss_iou": 0.267578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 600825548, + "step": 6196 + }, + { + "epoch": 0.605885803676183, + "grad_norm": 15.143666418086829, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 600922500, + "step": 6197 + }, + { + "epoch": 0.605885803676183, + "loss": 0.07551141083240509, + "loss_ce": 0.0028605014085769653, + "loss_iou": 0.28515625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 600922500, + "step": 6197 + }, + { + "epoch": 0.6059835745013687, + "grad_norm": 14.52762209655558, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 601019488, + "step": 6198 + }, + { + "epoch": 0.6059835745013687, + "loss": 0.12913347780704498, + "loss_ce": 0.00822475180029869, + "loss_iou": 0.265625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 601019488, + "step": 6198 + }, + { + "epoch": 0.6060813453265546, + "grad_norm": 12.111551718797763, + "learning_rate": 5e-05, + "loss": 0.1238, + "num_input_tokens_seen": 601116320, + "step": 6199 + }, + { + "epoch": 0.6060813453265546, + "loss": 0.14454621076583862, + "loss_ce": 0.005614924244582653, + "loss_iou": 0.26953125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 601116320, + "step": 6199 + }, + { + "epoch": 0.6061791161517404, + "grad_norm": 18.803240372012198, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 601212724, + "step": 6200 + }, + { + "epoch": 0.6061791161517404, + "loss": 0.08391793817281723, + "loss_ce": 0.00828012265264988, + "loss_iou": 0.259765625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 601212724, + "step": 6200 + }, + { + "epoch": 0.606276886976926, + "grad_norm": 19.371870070584674, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 601310084, + "step": 6201 + }, + { + "epoch": 0.606276886976926, + "loss": 0.07067523896694183, + "loss_ce": 0.0013545601395890117, + "loss_iou": 0.373046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 601310084, + "step": 6201 + }, + { + "epoch": 0.6063746578021119, + "grad_norm": 4.680340453278904, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 601406652, + "step": 6202 + }, + { + "epoch": 0.6063746578021119, + "loss": 0.09788503497838974, + "loss_ce": 0.0038603818975389004, + "loss_iou": 0.25390625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 601406652, + "step": 6202 + }, + { + "epoch": 0.6064724286272977, + "grad_norm": 4.850887925158682, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 601503968, + "step": 6203 + }, + { + "epoch": 0.6064724286272977, + "loss": 0.06653311848640442, + "loss_ce": 0.007084876298904419, + "loss_iou": 0.291015625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 601503968, + "step": 6203 + }, + { + "epoch": 0.6065701994524834, + "grad_norm": 5.252136638236821, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 601600968, + "step": 6204 + }, + { + "epoch": 0.6065701994524834, + "loss": 0.07951310276985168, + "loss_ce": 0.00282242801040411, + "loss_iou": 0.2294921875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 601600968, + "step": 6204 + }, + { + "epoch": 0.6066679702776692, + "grad_norm": 24.573182834159354, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 601697880, + "step": 6205 + }, + { + "epoch": 0.6066679702776692, + "loss": 0.10791033506393433, + "loss_ce": 0.0048715462908148766, + "loss_iou": 0.326171875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 601697880, + "step": 6205 + }, + { + "epoch": 0.6067657411028549, + "grad_norm": 11.95533424348079, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 601795092, + "step": 6206 + }, + { + "epoch": 0.6067657411028549, + "loss": 0.07291211187839508, + "loss_ce": 0.007192508317530155, + "loss_iou": 0.353515625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 601795092, + "step": 6206 + }, + { + "epoch": 0.6068635119280407, + "grad_norm": 10.085215829413492, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 601891460, + "step": 6207 + }, + { + "epoch": 0.6068635119280407, + "loss": 0.06293537467718124, + "loss_ce": 0.009079474955797195, + "loss_iou": 0.38671875, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 601891460, + "step": 6207 + }, + { + "epoch": 0.6069612827532265, + "grad_norm": 18.449306804416086, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 601988464, + "step": 6208 + }, + { + "epoch": 0.6069612827532265, + "loss": 0.11247480660676956, + "loss_ce": 0.005880722776055336, + "loss_iou": 0.279296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 601988464, + "step": 6208 + }, + { + "epoch": 0.6070590535784122, + "grad_norm": 16.54356341726408, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 602086184, + "step": 6209 + }, + { + "epoch": 0.6070590535784122, + "loss": 0.059304557740688324, + "loss_ce": 0.004845933523029089, + "loss_iou": 0.255859375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 602086184, + "step": 6209 + }, + { + "epoch": 0.607156824403598, + "grad_norm": 10.722314305494429, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 602183172, + "step": 6210 + }, + { + "epoch": 0.607156824403598, + "loss": 0.1051039844751358, + "loss_ce": 0.005677718669176102, + "loss_iou": 0.380859375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 602183172, + "step": 6210 + }, + { + "epoch": 0.6072545952287838, + "grad_norm": 11.497216583980993, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 602279728, + "step": 6211 + }, + { + "epoch": 0.6072545952287838, + "loss": 0.09785999357700348, + "loss_ce": 0.005605351645499468, + "loss_iou": 0.26953125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 602279728, + "step": 6211 + }, + { + "epoch": 0.6073523660539695, + "grad_norm": 4.1283619900115704, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 602376332, + "step": 6212 + }, + { + "epoch": 0.6073523660539695, + "loss": 0.0782507061958313, + "loss_ce": 0.0076406546868383884, + "loss_iou": 0.298828125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 602376332, + "step": 6212 + }, + { + "epoch": 0.6074501368791553, + "grad_norm": 28.297933257796267, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 602473992, + "step": 6213 + }, + { + "epoch": 0.6074501368791553, + "loss": 0.08114755153656006, + "loss_ce": 0.007424709387123585, + "loss_iou": 0.265625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 602473992, + "step": 6213 + }, + { + "epoch": 0.607547907704341, + "grad_norm": 8.695213904499306, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 602569596, + "step": 6214 + }, + { + "epoch": 0.607547907704341, + "loss": 0.09240813553333282, + "loss_ce": 0.005036306567490101, + "loss_iou": 0.35546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 602569596, + "step": 6214 + }, + { + "epoch": 0.6076456785295268, + "grad_norm": 7.075368280152231, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 602665808, + "step": 6215 + }, + { + "epoch": 0.6076456785295268, + "loss": 0.08337744325399399, + "loss_ce": 0.006000121124088764, + "loss_iou": 0.296875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 602665808, + "step": 6215 + }, + { + "epoch": 0.6077434493547126, + "grad_norm": 3.1984577283434077, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 602762896, + "step": 6216 + }, + { + "epoch": 0.6077434493547126, + "loss": 0.04317793995141983, + "loss_ce": 0.005366661120206118, + "loss_iou": 0.2451171875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 602762896, + "step": 6216 + }, + { + "epoch": 0.6078412201798983, + "grad_norm": 1.892859912659081, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 602860012, + "step": 6217 + }, + { + "epoch": 0.6078412201798983, + "loss": 0.07837921380996704, + "loss_ce": 0.0038247681222856045, + "loss_iou": 0.32421875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 602860012, + "step": 6217 + }, + { + "epoch": 0.6079389910050841, + "grad_norm": 5.911302190209124, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 602956076, + "step": 6218 + }, + { + "epoch": 0.6079389910050841, + "loss": 0.07590803503990173, + "loss_ce": 0.0034593017771840096, + "loss_iou": 0.2265625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 602956076, + "step": 6218 + }, + { + "epoch": 0.6080367618302699, + "grad_norm": 11.32282130475052, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 603054104, + "step": 6219 + }, + { + "epoch": 0.6080367618302699, + "loss": 0.04589226841926575, + "loss_ce": 0.0024581230245530605, + "loss_iou": 0.30859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 603054104, + "step": 6219 + }, + { + "epoch": 0.6081345326554556, + "grad_norm": 24.495497700110135, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 603151532, + "step": 6220 + }, + { + "epoch": 0.6081345326554556, + "loss": 0.07298511266708374, + "loss_ce": 0.001345102209597826, + "loss_iou": 0.359375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 603151532, + "step": 6220 + }, + { + "epoch": 0.6082323034806414, + "grad_norm": 14.01859362087613, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 603247912, + "step": 6221 + }, + { + "epoch": 0.6082323034806414, + "loss": 0.03684091567993164, + "loss_ce": 0.0028595926705747843, + "loss_iou": 0.26953125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 603247912, + "step": 6221 + }, + { + "epoch": 0.6083300743058272, + "grad_norm": 7.736244355456968, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 603346032, + "step": 6222 + }, + { + "epoch": 0.6083300743058272, + "loss": 0.11869081854820251, + "loss_ce": 0.008201923221349716, + "loss_iou": 0.294921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 603346032, + "step": 6222 + }, + { + "epoch": 0.6084278451310129, + "grad_norm": 15.505166579471693, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 603442248, + "step": 6223 + }, + { + "epoch": 0.6084278451310129, + "loss": 0.05452042818069458, + "loss_ce": 0.002411664230749011, + "loss_iou": 0.44921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 603442248, + "step": 6223 + }, + { + "epoch": 0.6085256159561987, + "grad_norm": 7.369092622044837, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 603538236, + "step": 6224 + }, + { + "epoch": 0.6085256159561987, + "loss": 0.04525544494390488, + "loss_ce": 0.005414748564362526, + "loss_iou": 0.2451171875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 603538236, + "step": 6224 + }, + { + "epoch": 0.6086233867813844, + "grad_norm": 5.052400653692757, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 603635112, + "step": 6225 + }, + { + "epoch": 0.6086233867813844, + "loss": 0.09517024457454681, + "loss_ce": 0.005128129385411739, + "loss_iou": 0.3046875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 603635112, + "step": 6225 + }, + { + "epoch": 0.6087211576065702, + "grad_norm": 3.566327097668955, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 603732512, + "step": 6226 + }, + { + "epoch": 0.6087211576065702, + "loss": 0.06097976863384247, + "loss_ce": 0.003415989689528942, + "loss_iou": 0.263671875, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 603732512, + "step": 6226 + }, + { + "epoch": 0.608818928431756, + "grad_norm": 7.1847571903536895, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 603828952, + "step": 6227 + }, + { + "epoch": 0.608818928431756, + "loss": 0.0875345766544342, + "loss_ce": 0.005869533866643906, + "loss_iou": 0.25, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 603828952, + "step": 6227 + }, + { + "epoch": 0.6089166992569417, + "grad_norm": 20.973860914192517, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 603926064, + "step": 6228 + }, + { + "epoch": 0.6089166992569417, + "loss": 0.06887997686862946, + "loss_ce": 0.001432316959835589, + "loss_iou": 0.1943359375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 603926064, + "step": 6228 + }, + { + "epoch": 0.6090144700821275, + "grad_norm": 3.234627016931304, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 604024080, + "step": 6229 + }, + { + "epoch": 0.6090144700821275, + "loss": 0.11391007155179977, + "loss_ce": 0.00829636212438345, + "loss_iou": 0.294921875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 604024080, + "step": 6229 + }, + { + "epoch": 0.6091122409073133, + "grad_norm": 6.0562866202881755, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 604120876, + "step": 6230 + }, + { + "epoch": 0.6091122409073133, + "loss": 0.06743502616882324, + "loss_ce": 0.004980805795639753, + "loss_iou": 0.30078125, + "loss_num": 0.012451171875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 604120876, + "step": 6230 + }, + { + "epoch": 0.609210011732499, + "grad_norm": 2.0945503062566573, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 604217880, + "step": 6231 + }, + { + "epoch": 0.609210011732499, + "loss": 0.04795766621828079, + "loss_ce": 0.0033180785831063986, + "loss_iou": 0.30859375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 604217880, + "step": 6231 + }, + { + "epoch": 0.6093077825576848, + "grad_norm": 7.592987504775974, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 604314768, + "step": 6232 + }, + { + "epoch": 0.6093077825576848, + "loss": 0.07344911247491837, + "loss_ce": 0.005326247774064541, + "loss_iou": 0.265625, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 604314768, + "step": 6232 + }, + { + "epoch": 0.6094055533828705, + "grad_norm": 16.799988139630646, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 604411232, + "step": 6233 + }, + { + "epoch": 0.6094055533828705, + "loss": 0.055785927921533585, + "loss_ce": 0.006347451359033585, + "loss_iou": 0.33203125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 604411232, + "step": 6233 + }, + { + "epoch": 0.6095033242080563, + "grad_norm": 33.04917266736611, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 604508684, + "step": 6234 + }, + { + "epoch": 0.6095033242080563, + "loss": 0.08840058743953705, + "loss_ce": 0.00798676535487175, + "loss_iou": 0.3515625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 604508684, + "step": 6234 + }, + { + "epoch": 0.6096010950332421, + "grad_norm": 11.108320132081287, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 604605764, + "step": 6235 + }, + { + "epoch": 0.6096010950332421, + "loss": 0.0624697208404541, + "loss_ce": 0.0025408235378563404, + "loss_iou": 0.279296875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 604605764, + "step": 6235 + }, + { + "epoch": 0.6096988658584278, + "grad_norm": 7.583377788608875, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 604702512, + "step": 6236 + }, + { + "epoch": 0.6096988658584278, + "loss": 0.10411649942398071, + "loss_ce": 0.005682047456502914, + "loss_iou": 0.291015625, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 604702512, + "step": 6236 + }, + { + "epoch": 0.6097966366836136, + "grad_norm": 4.220269574233453, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 604799180, + "step": 6237 + }, + { + "epoch": 0.6097966366836136, + "loss": 0.0733594000339508, + "loss_ce": 0.0012921406887471676, + "loss_iou": 0.35546875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 604799180, + "step": 6237 + }, + { + "epoch": 0.6098944075087994, + "grad_norm": 10.234551311452659, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 604896396, + "step": 6238 + }, + { + "epoch": 0.6098944075087994, + "loss": 0.08185727149248123, + "loss_ce": 0.002908294089138508, + "loss_iou": 0.2451171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 604896396, + "step": 6238 + }, + { + "epoch": 0.6099921783339851, + "grad_norm": 5.583838163195098, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 604993252, + "step": 6239 + }, + { + "epoch": 0.6099921783339851, + "loss": 0.0700281485915184, + "loss_ce": 0.0055902814492583275, + "loss_iou": 0.3203125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 604993252, + "step": 6239 + }, + { + "epoch": 0.6100899491591709, + "grad_norm": 3.028823495534272, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 605090276, + "step": 6240 + }, + { + "epoch": 0.6100899491591709, + "loss": 0.08900424838066101, + "loss_ce": 0.00424358481541276, + "loss_iou": 0.171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 605090276, + "step": 6240 + }, + { + "epoch": 0.6101877199843566, + "grad_norm": 5.297796565894676, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 605186464, + "step": 6241 + }, + { + "epoch": 0.6101877199843566, + "loss": 0.07596096396446228, + "loss_ce": 0.004122588783502579, + "loss_iou": 0.3125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 605186464, + "step": 6241 + }, + { + "epoch": 0.6102854908095424, + "grad_norm": 5.462585575653726, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 605283612, + "step": 6242 + }, + { + "epoch": 0.6102854908095424, + "loss": 0.09108318388462067, + "loss_ce": 0.005756031721830368, + "loss_iou": 0.314453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 605283612, + "step": 6242 + }, + { + "epoch": 0.6103832616347282, + "grad_norm": 4.230317493865342, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 605380448, + "step": 6243 + }, + { + "epoch": 0.6103832616347282, + "loss": 0.05877264216542244, + "loss_ce": 0.007274231873452663, + "loss_iou": 0.2734375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 605380448, + "step": 6243 + }, + { + "epoch": 0.6104810324599139, + "grad_norm": 7.142130110166331, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 605477704, + "step": 6244 + }, + { + "epoch": 0.6104810324599139, + "loss": 0.06188014894723892, + "loss_ce": 0.006734881084412336, + "loss_iou": 0.294921875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 605477704, + "step": 6244 + }, + { + "epoch": 0.6105788032850997, + "grad_norm": 5.1795267555197295, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 605575048, + "step": 6245 + }, + { + "epoch": 0.6105788032850997, + "loss": 0.11901727318763733, + "loss_ce": 0.008131659589707851, + "loss_iou": 0.32421875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 605575048, + "step": 6245 + }, + { + "epoch": 0.6106765741102855, + "grad_norm": 10.286608252057675, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 605672428, + "step": 6246 + }, + { + "epoch": 0.6106765741102855, + "loss": 0.08663523197174072, + "loss_ce": 0.005672096274793148, + "loss_iou": 0.39453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 605672428, + "step": 6246 + }, + { + "epoch": 0.6107743449354712, + "grad_norm": 5.542591004133319, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 605768652, + "step": 6247 + }, + { + "epoch": 0.6107743449354712, + "loss": 0.0856255516409874, + "loss_ce": 0.006218814291059971, + "loss_iou": 0.35546875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 605768652, + "step": 6247 + }, + { + "epoch": 0.610872115760657, + "grad_norm": 4.0856587939193885, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 605865732, + "step": 6248 + }, + { + "epoch": 0.610872115760657, + "loss": 0.03851117566227913, + "loss_ce": 0.004316230304539204, + "loss_iou": 0.25390625, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 605865732, + "step": 6248 + }, + { + "epoch": 0.6109698865858428, + "grad_norm": 16.088710003481665, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 605962132, + "step": 6249 + }, + { + "epoch": 0.6109698865858428, + "loss": 0.12011341005563736, + "loss_ce": 0.01185992918908596, + "loss_iou": 0.294921875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 605962132, + "step": 6249 + }, + { + "epoch": 0.6110676574110285, + "grad_norm": 10.268290236918919, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 606058980, + "step": 6250 + }, + { + "epoch": 0.6110676574110285, + "eval_seeclick_CIoU": 0.5483821928501129, + "eval_seeclick_GIoU": 0.5520865023136139, + "eval_seeclick_IoU": 0.5881727933883667, + "eval_seeclick_MAE_all": 0.06920021958649158, + "eval_seeclick_MAE_h": 0.0400595972314477, + "eval_seeclick_MAE_w": 0.08929934725165367, + "eval_seeclick_MAE_x": 0.10412197187542915, + "eval_seeclick_MAE_y": 0.043319981545209885, + "eval_seeclick_NUM_probability": 0.9999975264072418, + "eval_seeclick_inside_bbox": 0.8295454680919647, + "eval_seeclick_loss": 0.2816452980041504, + "eval_seeclick_loss_ce": 0.00969824381172657, + "eval_seeclick_loss_iou": 0.36968994140625, + "eval_seeclick_loss_num": 0.055866241455078125, + "eval_seeclick_loss_xval": 0.2793426513671875, + "eval_seeclick_runtime": 77.7926, + "eval_seeclick_samples_per_second": 0.553, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 606058980, + "step": 6250 + }, + { + "epoch": 0.6110676574110285, + "eval_icons_CIoU": 0.7432528734207153, + "eval_icons_GIoU": 0.7453126311302185, + "eval_icons_IoU": 0.7644881904125214, + "eval_icons_MAE_all": 0.04564499855041504, + "eval_icons_MAE_h": 0.0423356662504375, + "eval_icons_MAE_w": 0.0500092227011919, + "eval_icons_MAE_x": 0.052800893783569336, + "eval_icons_MAE_y": 0.03743421263061464, + "eval_icons_NUM_probability": 0.9999950230121613, + "eval_icons_inside_bbox": 0.8506944477558136, + "eval_icons_loss": 0.14936670660972595, + "eval_icons_loss_ce": 2.5238507532776566e-06, + "eval_icons_loss_iou": 0.391357421875, + "eval_icons_loss_num": 0.03434562683105469, + "eval_icons_loss_xval": 0.17168426513671875, + "eval_icons_runtime": 86.3944, + "eval_icons_samples_per_second": 0.579, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 606058980, + "step": 6250 + }, + { + "epoch": 0.6110676574110285, + "eval_screenspot_CIoU": 0.30682280162970227, + "eval_screenspot_GIoU": 0.2840046286582947, + "eval_screenspot_IoU": 0.40105263392130536, + "eval_screenspot_MAE_all": 0.16351390381654105, + "eval_screenspot_MAE_h": 0.11885547389586766, + "eval_screenspot_MAE_w": 0.21810749173164368, + "eval_screenspot_MAE_x": 0.20234478016694388, + "eval_screenspot_MAE_y": 0.11474792162577312, + "eval_screenspot_NUM_probability": 0.9999949733416239, + "eval_screenspot_inside_bbox": 0.609166661898295, + "eval_screenspot_loss": 0.5842077136039734, + "eval_screenspot_loss_ce": 0.023131600270668667, + "eval_screenspot_loss_iou": 0.3364664713541667, + "eval_screenspot_loss_num": 0.114288330078125, + "eval_screenspot_loss_xval": 0.5710856119791666, + "eval_screenspot_runtime": 150.5621, + "eval_screenspot_samples_per_second": 0.591, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 606058980, + "step": 6250 + }, + { + "epoch": 0.6110676574110285, + "eval_compot_CIoU": 0.5069077759981155, + "eval_compot_GIoU": 0.4875871539115906, + "eval_compot_IoU": 0.5614463090896606, + "eval_compot_MAE_all": 0.08713650330901146, + "eval_compot_MAE_h": 0.07275000959634781, + "eval_compot_MAE_w": 0.10439065843820572, + "eval_compot_MAE_x": 0.09554559364914894, + "eval_compot_MAE_y": 0.07585974782705307, + "eval_compot_NUM_probability": 0.9999907612800598, + "eval_compot_inside_bbox": 0.7673611044883728, + "eval_compot_loss": 0.2650796175003052, + "eval_compot_loss_ce": 0.013193624094128609, + "eval_compot_loss_iou": 0.4451904296875, + "eval_compot_loss_num": 0.045406341552734375, + "eval_compot_loss_xval": 0.2269134521484375, + "eval_compot_runtime": 87.2218, + "eval_compot_samples_per_second": 0.573, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 606058980, + "step": 6250 + }, + { + "epoch": 0.6110676574110285, + "loss": 0.24597245454788208, + "loss_ce": 0.01385575719177723, + "loss_iou": 0.419921875, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 606058980, + "step": 6250 + }, + { + "epoch": 0.6111654282362143, + "grad_norm": 14.035216981675028, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 606155800, + "step": 6251 + }, + { + "epoch": 0.6111654282362143, + "loss": 0.04773882403969765, + "loss_ce": 0.0020845290273427963, + "loss_iou": 0.4140625, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 606155800, + "step": 6251 + }, + { + "epoch": 0.6112631990614, + "grad_norm": 6.158019769727738, + "learning_rate": 5e-05, + "loss": 0.0432, + "num_input_tokens_seen": 606251632, + "step": 6252 + }, + { + "epoch": 0.6112631990614, + "loss": 0.05813252925872803, + "loss_ce": 0.007755633909255266, + "loss_iou": 0.251953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 606251632, + "step": 6252 + }, + { + "epoch": 0.6113609698865858, + "grad_norm": 5.1564562293122504, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 606349112, + "step": 6253 + }, + { + "epoch": 0.6113609698865858, + "loss": 0.05999720096588135, + "loss_ce": 0.002898809965699911, + "loss_iou": 0.29296875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 606349112, + "step": 6253 + }, + { + "epoch": 0.6114587407117716, + "grad_norm": 12.375230284138672, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 606445312, + "step": 6254 + }, + { + "epoch": 0.6114587407117716, + "loss": 0.07084760069847107, + "loss_ce": 0.0013209308963268995, + "loss_iou": 0.224609375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 606445312, + "step": 6254 + }, + { + "epoch": 0.6115565115369573, + "grad_norm": 10.983603348281706, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 606542200, + "step": 6255 + }, + { + "epoch": 0.6115565115369573, + "loss": 0.08469906449317932, + "loss_ce": 0.004743006080389023, + "loss_iou": 0.326171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 606542200, + "step": 6255 + }, + { + "epoch": 0.6116542823621431, + "grad_norm": 8.288001319365609, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 606639904, + "step": 6256 + }, + { + "epoch": 0.6116542823621431, + "loss": 0.08753044903278351, + "loss_ce": 0.0073760333471000195, + "loss_iou": 0.2734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 606639904, + "step": 6256 + }, + { + "epoch": 0.611752053187329, + "grad_norm": 7.982799049038213, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 606737200, + "step": 6257 + }, + { + "epoch": 0.611752053187329, + "loss": 0.050424665212631226, + "loss_ce": 0.0021744645200669765, + "loss_iou": 0.34765625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 606737200, + "step": 6257 + }, + { + "epoch": 0.6118498240125146, + "grad_norm": 9.442384118381227, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 606834028, + "step": 6258 + }, + { + "epoch": 0.6118498240125146, + "loss": 0.08656767010688782, + "loss_ce": 0.00755765475332737, + "loss_iou": 0.251953125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 606834028, + "step": 6258 + }, + { + "epoch": 0.6119475948377004, + "grad_norm": 18.170028865436425, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 606930116, + "step": 6259 + }, + { + "epoch": 0.6119475948377004, + "loss": 0.07434550672769547, + "loss_ce": 0.006199751514941454, + "loss_iou": 0.30078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 606930116, + "step": 6259 + }, + { + "epoch": 0.6120453656628861, + "grad_norm": 6.125786360358094, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 607027136, + "step": 6260 + }, + { + "epoch": 0.6120453656628861, + "loss": 0.06542283296585083, + "loss_ce": 0.003067789366468787, + "loss_iou": 0.255859375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 607027136, + "step": 6260 + }, + { + "epoch": 0.612143136488072, + "grad_norm": 3.867443119953148, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 607123820, + "step": 6261 + }, + { + "epoch": 0.612143136488072, + "loss": 0.039492473006248474, + "loss_ce": 0.004626142792403698, + "loss_iou": 0.271484375, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 607123820, + "step": 6261 + }, + { + "epoch": 0.6122409073132578, + "grad_norm": 3.1277381522640963, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 607219868, + "step": 6262 + }, + { + "epoch": 0.6122409073132578, + "loss": 0.051009658724069595, + "loss_ce": 0.00663710106164217, + "loss_iou": 0.353515625, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 607219868, + "step": 6262 + }, + { + "epoch": 0.6123386781384434, + "grad_norm": 4.528671709452437, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 607316556, + "step": 6263 + }, + { + "epoch": 0.6123386781384434, + "loss": 0.0880412608385086, + "loss_ce": 0.009824702516198158, + "loss_iou": 0.2470703125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 607316556, + "step": 6263 + }, + { + "epoch": 0.6124364489636293, + "grad_norm": 4.686232408493394, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 607412808, + "step": 6264 + }, + { + "epoch": 0.6124364489636293, + "loss": 0.05446415767073631, + "loss_ce": 0.006162458099424839, + "loss_iou": 0.267578125, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 607412808, + "step": 6264 + }, + { + "epoch": 0.6125342197888151, + "grad_norm": 7.437657711196716, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 607509780, + "step": 6265 + }, + { + "epoch": 0.6125342197888151, + "loss": 0.060144491493701935, + "loss_ce": 0.005914752371609211, + "loss_iou": 0.328125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 607509780, + "step": 6265 + }, + { + "epoch": 0.6126319906140008, + "grad_norm": 11.443693786941338, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 607607936, + "step": 6266 + }, + { + "epoch": 0.6126319906140008, + "loss": 0.05562177300453186, + "loss_ce": 0.00793805904686451, + "loss_iou": 0.375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 607607936, + "step": 6266 + }, + { + "epoch": 0.6127297614391866, + "grad_norm": 15.324152895561536, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 607705420, + "step": 6267 + }, + { + "epoch": 0.6127297614391866, + "loss": 0.07955894619226456, + "loss_ce": 0.0018917105626314878, + "loss_iou": 0.451171875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 607705420, + "step": 6267 + }, + { + "epoch": 0.6128275322643724, + "grad_norm": 22.24070379079832, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 607801684, + "step": 6268 + }, + { + "epoch": 0.6128275322643724, + "loss": 0.056788381189107895, + "loss_ce": 0.005488335154950619, + "loss_iou": 0.333984375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 607801684, + "step": 6268 + }, + { + "epoch": 0.6129253030895581, + "grad_norm": 4.810624593245433, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 607898872, + "step": 6269 + }, + { + "epoch": 0.6129253030895581, + "loss": 0.04631676524877548, + "loss_ce": 0.004706048406660557, + "loss_iou": 0.298828125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 607898872, + "step": 6269 + }, + { + "epoch": 0.6130230739147439, + "grad_norm": 4.992453284630153, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 607996140, + "step": 6270 + }, + { + "epoch": 0.6130230739147439, + "loss": 0.059834323823451996, + "loss_ce": 0.0022629129234701395, + "loss_iou": 0.29296875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 607996140, + "step": 6270 + }, + { + "epoch": 0.6131208447399296, + "grad_norm": 8.197254356497455, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 608093104, + "step": 6271 + }, + { + "epoch": 0.6131208447399296, + "loss": 0.10906052589416504, + "loss_ce": 0.006674052216112614, + "loss_iou": 0.298828125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 608093104, + "step": 6271 + }, + { + "epoch": 0.6132186155651154, + "grad_norm": 13.538048404557808, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 608189712, + "step": 6272 + }, + { + "epoch": 0.6132186155651154, + "loss": 0.07503858953714371, + "loss_ce": 0.0016438128659501672, + "loss_iou": 0.2197265625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 608189712, + "step": 6272 + }, + { + "epoch": 0.6133163863903012, + "grad_norm": 9.935436337988753, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 608286092, + "step": 6273 + }, + { + "epoch": 0.6133163863903012, + "loss": 0.07851991802453995, + "loss_ce": 0.004377466626465321, + "loss_iou": 0.359375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 608286092, + "step": 6273 + }, + { + "epoch": 0.6134141572154869, + "grad_norm": 6.669369842736608, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 608382064, + "step": 6274 + }, + { + "epoch": 0.6134141572154869, + "loss": 0.08680475503206253, + "loss_ce": 0.005757695529609919, + "loss_iou": 0.26953125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 608382064, + "step": 6274 + }, + { + "epoch": 0.6135119280406727, + "grad_norm": 2.8928140380476557, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 608478504, + "step": 6275 + }, + { + "epoch": 0.6135119280406727, + "loss": 0.07036246359348297, + "loss_ce": 0.004490269348025322, + "loss_iou": 0.2451171875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 608478504, + "step": 6275 + }, + { + "epoch": 0.6136096988658585, + "grad_norm": 3.415540454834913, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 608575392, + "step": 6276 + }, + { + "epoch": 0.6136096988658585, + "loss": 0.05396859347820282, + "loss_ce": 0.006872342899441719, + "loss_iou": 0.240234375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 608575392, + "step": 6276 + }, + { + "epoch": 0.6137074696910442, + "grad_norm": 4.284822350157253, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 608673228, + "step": 6277 + }, + { + "epoch": 0.6137074696910442, + "loss": 0.04487098008394241, + "loss_ce": 0.0034433684777468443, + "loss_iou": 0.294921875, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 608673228, + "step": 6277 + }, + { + "epoch": 0.61380524051623, + "grad_norm": 35.74968458153283, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 608769720, + "step": 6278 + }, + { + "epoch": 0.61380524051623, + "loss": 0.05646294355392456, + "loss_ce": 0.003976522944867611, + "loss_iou": 0.2578125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 608769720, + "step": 6278 + }, + { + "epoch": 0.6139030113414157, + "grad_norm": 38.113450025718805, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 608866464, + "step": 6279 + }, + { + "epoch": 0.6139030113414157, + "loss": 0.06492838263511658, + "loss_ce": 0.001756991259753704, + "loss_iou": 0.271484375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 608866464, + "step": 6279 + }, + { + "epoch": 0.6140007821666015, + "grad_norm": 10.009925250460196, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 608961896, + "step": 6280 + }, + { + "epoch": 0.6140007821666015, + "loss": 0.055111974477767944, + "loss_ce": 0.003720370586961508, + "loss_iou": 0.1689453125, + "loss_num": 0.01031494140625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 608961896, + "step": 6280 + }, + { + "epoch": 0.6140985529917873, + "grad_norm": 8.99960746007749, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 609058612, + "step": 6281 + }, + { + "epoch": 0.6140985529917873, + "loss": 0.05553354695439339, + "loss_ce": 0.004035134334117174, + "loss_iou": 0.2265625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 609058612, + "step": 6281 + }, + { + "epoch": 0.614196323816973, + "grad_norm": 3.046328719923857, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 609155604, + "step": 6282 + }, + { + "epoch": 0.614196323816973, + "loss": 0.07712563127279282, + "loss_ce": 0.003997881896793842, + "loss_iou": 0.296875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 609155604, + "step": 6282 + }, + { + "epoch": 0.6142940946421588, + "grad_norm": 5.755915916198353, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 609252036, + "step": 6283 + }, + { + "epoch": 0.6142940946421588, + "loss": 0.1341446489095688, + "loss_ce": 0.0049179596826434135, + "loss_iou": 0.3203125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 609252036, + "step": 6283 + }, + { + "epoch": 0.6143918654673446, + "grad_norm": 9.55389385057737, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 609348204, + "step": 6284 + }, + { + "epoch": 0.6143918654673446, + "loss": 0.06683626025915146, + "loss_ce": 0.004488847684115171, + "loss_iou": 0.283203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 609348204, + "step": 6284 + }, + { + "epoch": 0.6144896362925303, + "grad_norm": 5.9543182843272415, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 609445428, + "step": 6285 + }, + { + "epoch": 0.6144896362925303, + "loss": 0.06816430389881134, + "loss_ce": 0.0035128179006278515, + "loss_iou": 0.326171875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 609445428, + "step": 6285 + }, + { + "epoch": 0.6145874071177161, + "grad_norm": 5.636861873775779, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 609543172, + "step": 6286 + }, + { + "epoch": 0.6145874071177161, + "loss": 0.10384680330753326, + "loss_ce": 0.00210118992254138, + "loss_iou": 0.28125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 609543172, + "step": 6286 + }, + { + "epoch": 0.6146851779429018, + "grad_norm": 7.140809125524692, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 609639796, + "step": 6287 + }, + { + "epoch": 0.6146851779429018, + "loss": 0.07586662471294403, + "loss_ce": 0.0066908979788422585, + "loss_iou": 0.267578125, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 609639796, + "step": 6287 + }, + { + "epoch": 0.6147829487680876, + "grad_norm": 3.077072967303221, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 609737244, + "step": 6288 + }, + { + "epoch": 0.6147829487680876, + "loss": 0.06731507927179337, + "loss_ce": 0.0017022877000272274, + "loss_iou": 0.44921875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 609737244, + "step": 6288 + }, + { + "epoch": 0.6148807195932734, + "grad_norm": 10.423160532960155, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 609833708, + "step": 6289 + }, + { + "epoch": 0.6148807195932734, + "loss": 0.07172121107578278, + "loss_ce": 0.007214683573693037, + "loss_iou": 0.279296875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 609833708, + "step": 6289 + }, + { + "epoch": 0.6149784904184591, + "grad_norm": 6.874207928142209, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 609928768, + "step": 6290 + }, + { + "epoch": 0.6149784904184591, + "loss": 0.050482362508773804, + "loss_ce": 0.003363220486789942, + "loss_iou": 0.283203125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 609928768, + "step": 6290 + }, + { + "epoch": 0.6150762612436449, + "grad_norm": 13.699052937682119, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 610024720, + "step": 6291 + }, + { + "epoch": 0.6150762612436449, + "loss": 0.1305713802576065, + "loss_ce": 0.006685275584459305, + "loss_iou": 0.21875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 610024720, + "step": 6291 + }, + { + "epoch": 0.6151740320688307, + "grad_norm": 16.02424442662578, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 610121956, + "step": 6292 + }, + { + "epoch": 0.6151740320688307, + "loss": 0.03619953989982605, + "loss_ce": 0.004569021984934807, + "loss_iou": 0.2451171875, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 610121956, + "step": 6292 + }, + { + "epoch": 0.6152718028940164, + "grad_norm": 5.237277484499656, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 610218376, + "step": 6293 + }, + { + "epoch": 0.6152718028940164, + "loss": 0.08544860780239105, + "loss_ce": 0.0013440633192658424, + "loss_iou": 0.234375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 610218376, + "step": 6293 + }, + { + "epoch": 0.6153695737192022, + "grad_norm": 15.508426413139748, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 610314748, + "step": 6294 + }, + { + "epoch": 0.6153695737192022, + "loss": 0.056127287447452545, + "loss_ce": 0.00827572587877512, + "loss_iou": 0.28515625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 610314748, + "step": 6294 + }, + { + "epoch": 0.615467344544388, + "grad_norm": 6.743913554918923, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 610411828, + "step": 6295 + }, + { + "epoch": 0.615467344544388, + "loss": 0.06872419267892838, + "loss_ce": 0.007185491733253002, + "loss_iou": 0.36328125, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 610411828, + "step": 6295 + }, + { + "epoch": 0.6155651153695737, + "grad_norm": 19.718498063567406, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 610509624, + "step": 6296 + }, + { + "epoch": 0.6155651153695737, + "loss": 0.08915610611438751, + "loss_ce": 0.00105185410939157, + "loss_iou": 0.310546875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 610509624, + "step": 6296 + }, + { + "epoch": 0.6156628861947595, + "grad_norm": 8.928292920312416, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 610607004, + "step": 6297 + }, + { + "epoch": 0.6156628861947595, + "loss": 0.06781723350286484, + "loss_ce": 0.003684544935822487, + "loss_iou": 0.318359375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 610607004, + "step": 6297 + }, + { + "epoch": 0.6157606570199452, + "grad_norm": 16.60879401664933, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 610704268, + "step": 6298 + }, + { + "epoch": 0.6157606570199452, + "loss": 0.08302800357341766, + "loss_ce": 0.004826708696782589, + "loss_iou": 0.2734375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 610704268, + "step": 6298 + }, + { + "epoch": 0.615858427845131, + "grad_norm": 8.956410735489609, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 610801436, + "step": 6299 + }, + { + "epoch": 0.615858427845131, + "loss": 0.08790583908557892, + "loss_ce": 0.004234272055327892, + "loss_iou": 0.48046875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 610801436, + "step": 6299 + }, + { + "epoch": 0.6159561986703168, + "grad_norm": 7.722891938987442, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 610898380, + "step": 6300 + }, + { + "epoch": 0.6159561986703168, + "loss": 0.06887423247098923, + "loss_ce": 0.003612392581999302, + "loss_iou": 0.298828125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 610898380, + "step": 6300 + }, + { + "epoch": 0.6160539694955025, + "grad_norm": 6.631104684508612, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 610995448, + "step": 6301 + }, + { + "epoch": 0.6160539694955025, + "loss": 0.07027247548103333, + "loss_ce": 0.003896741895005107, + "loss_iou": 0.3828125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 610995448, + "step": 6301 + }, + { + "epoch": 0.6161517403206883, + "grad_norm": 5.1803972128214655, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 611092888, + "step": 6302 + }, + { + "epoch": 0.6161517403206883, + "loss": 0.11120447516441345, + "loss_ce": 0.005644173361361027, + "loss_iou": 0.255859375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 611092888, + "step": 6302 + }, + { + "epoch": 0.6162495111458741, + "grad_norm": 3.0654376855302146, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 611189660, + "step": 6303 + }, + { + "epoch": 0.6162495111458741, + "loss": 0.0601690299808979, + "loss_ce": 0.004230310674756765, + "loss_iou": 0.322265625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 611189660, + "step": 6303 + }, + { + "epoch": 0.6163472819710598, + "grad_norm": 5.369042720930412, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 611286512, + "step": 6304 + }, + { + "epoch": 0.6163472819710598, + "loss": 0.06887645274400711, + "loss_ce": 0.0064031570218503475, + "loss_iou": 0.19921875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 611286512, + "step": 6304 + }, + { + "epoch": 0.6164450527962456, + "grad_norm": 21.650146395495735, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 611383520, + "step": 6305 + }, + { + "epoch": 0.6164450527962456, + "loss": 0.09779645502567291, + "loss_ce": 0.005587596911936998, + "loss_iou": 0.32421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 611383520, + "step": 6305 + }, + { + "epoch": 0.6165428236214313, + "grad_norm": 31.727665086626544, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 611480052, + "step": 6306 + }, + { + "epoch": 0.6165428236214313, + "loss": 0.05683060362935066, + "loss_ce": 0.0017044139094650745, + "loss_iou": 0.27734375, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 611480052, + "step": 6306 + }, + { + "epoch": 0.6166405944466171, + "grad_norm": 14.169114120506446, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 611575956, + "step": 6307 + }, + { + "epoch": 0.6166405944466171, + "loss": 0.050028279423713684, + "loss_ce": 0.010061697103083134, + "loss_iou": 0.30078125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 611575956, + "step": 6307 + }, + { + "epoch": 0.6167383652718029, + "grad_norm": 25.299267441587716, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 611672508, + "step": 6308 + }, + { + "epoch": 0.6167383652718029, + "loss": 0.09141528606414795, + "loss_ce": 0.008865239098668098, + "loss_iou": 0.271484375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 611672508, + "step": 6308 + }, + { + "epoch": 0.6168361360969886, + "grad_norm": 6.109728029736093, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 611768920, + "step": 6309 + }, + { + "epoch": 0.6168361360969886, + "loss": 0.09698022156953812, + "loss_ce": 0.009364252910017967, + "loss_iou": 0.26171875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 611768920, + "step": 6309 + }, + { + "epoch": 0.6169339069221744, + "grad_norm": 27.543874434793246, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 611865580, + "step": 6310 + }, + { + "epoch": 0.6169339069221744, + "loss": 0.07408425956964493, + "loss_ce": 0.006747221574187279, + "loss_iou": 0.279296875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 611865580, + "step": 6310 + }, + { + "epoch": 0.6170316777473602, + "grad_norm": 45.50986407068111, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 611961924, + "step": 6311 + }, + { + "epoch": 0.6170316777473602, + "loss": 0.09171253442764282, + "loss_ce": 0.006232797168195248, + "loss_iou": 0.25, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 611961924, + "step": 6311 + }, + { + "epoch": 0.6171294485725459, + "grad_norm": 16.38775126125163, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 612058812, + "step": 6312 + }, + { + "epoch": 0.6171294485725459, + "loss": 0.11774124205112457, + "loss_ce": 0.004231106489896774, + "loss_iou": 0.296875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 612058812, + "step": 6312 + }, + { + "epoch": 0.6172272193977317, + "grad_norm": 7.94154116620425, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 612155364, + "step": 6313 + }, + { + "epoch": 0.6172272193977317, + "loss": 0.05704759433865547, + "loss_ce": 0.0028788927011191845, + "loss_iou": 0.30859375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 612155364, + "step": 6313 + }, + { + "epoch": 0.6173249902229175, + "grad_norm": 4.6396980614005665, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 612252228, + "step": 6314 + }, + { + "epoch": 0.6173249902229175, + "loss": 0.10028692334890366, + "loss_ce": 0.004461724776774645, + "loss_iou": 0.318359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 612252228, + "step": 6314 + }, + { + "epoch": 0.6174227610481032, + "grad_norm": 3.3121782023900668, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 612349152, + "step": 6315 + }, + { + "epoch": 0.6174227610481032, + "loss": 0.12005911767482758, + "loss_ce": 0.0049468157812952995, + "loss_iou": 0.3125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 612349152, + "step": 6315 + }, + { + "epoch": 0.617520531873289, + "grad_norm": 4.527468872952294, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 612446152, + "step": 6316 + }, + { + "epoch": 0.617520531873289, + "loss": 0.039263732731342316, + "loss_ce": 0.004982003476470709, + "loss_iou": 0.3046875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 612446152, + "step": 6316 + }, + { + "epoch": 0.6176183026984747, + "grad_norm": 12.575835544010067, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 612542600, + "step": 6317 + }, + { + "epoch": 0.6176183026984747, + "loss": 0.07796168327331543, + "loss_ce": 0.007908589206635952, + "loss_iou": 0.271484375, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 612542600, + "step": 6317 + }, + { + "epoch": 0.6177160735236605, + "grad_norm": 16.914868932708934, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 612639496, + "step": 6318 + }, + { + "epoch": 0.6177160735236605, + "loss": 0.061380911618471146, + "loss_ce": 0.003893421031534672, + "loss_iou": 0.306640625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 612639496, + "step": 6318 + }, + { + "epoch": 0.6178138443488463, + "grad_norm": 7.296633602152569, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 612736168, + "step": 6319 + }, + { + "epoch": 0.6178138443488463, + "loss": 0.06521201878786087, + "loss_ce": 0.006206282414495945, + "loss_iou": 0.326171875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 612736168, + "step": 6319 + }, + { + "epoch": 0.617911615174032, + "grad_norm": 4.103915228838432, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 612832584, + "step": 6320 + }, + { + "epoch": 0.617911615174032, + "loss": 0.050317272543907166, + "loss_ce": 0.0015997730661183596, + "loss_iou": 0.294921875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 612832584, + "step": 6320 + }, + { + "epoch": 0.6180093859992178, + "grad_norm": 14.36998800915069, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 612929428, + "step": 6321 + }, + { + "epoch": 0.6180093859992178, + "loss": 0.13315734267234802, + "loss_ce": 0.010552969761192799, + "loss_iou": 0.3359375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 612929428, + "step": 6321 + }, + { + "epoch": 0.6181071568244036, + "grad_norm": 12.913718241693132, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 613026796, + "step": 6322 + }, + { + "epoch": 0.6181071568244036, + "loss": 0.06495091319084167, + "loss_ce": 0.010492300614714622, + "loss_iou": 0.25, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 613026796, + "step": 6322 + }, + { + "epoch": 0.6182049276495893, + "grad_norm": 6.307834938407205, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 613124364, + "step": 6323 + }, + { + "epoch": 0.6182049276495893, + "loss": 0.08760683238506317, + "loss_ce": 0.0030159158632159233, + "loss_iou": 0.248046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 613124364, + "step": 6323 + }, + { + "epoch": 0.6183026984747751, + "grad_norm": 2.3246706027995887, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 613221244, + "step": 6324 + }, + { + "epoch": 0.6183026984747751, + "loss": 0.06630104780197144, + "loss_ce": 0.003175433725118637, + "loss_iou": 0.24609375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 613221244, + "step": 6324 + }, + { + "epoch": 0.6184004692999608, + "grad_norm": 21.140077870618313, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 613318172, + "step": 6325 + }, + { + "epoch": 0.6184004692999608, + "loss": 0.04977750778198242, + "loss_ce": 0.004184247925877571, + "loss_iou": 0.357421875, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 613318172, + "step": 6325 + }, + { + "epoch": 0.6184982401251466, + "grad_norm": 3.6767420612276847, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 613414324, + "step": 6326 + }, + { + "epoch": 0.6184982401251466, + "loss": 0.09406642615795135, + "loss_ce": 0.0028036076109856367, + "loss_iou": 0.2890625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 613414324, + "step": 6326 + }, + { + "epoch": 0.6185960109503325, + "grad_norm": 17.564996328838458, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 613511540, + "step": 6327 + }, + { + "epoch": 0.6185960109503325, + "loss": 0.03951152414083481, + "loss_ce": 0.005598865449428558, + "loss_iou": 0.427734375, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 613511540, + "step": 6327 + }, + { + "epoch": 0.6186937817755181, + "grad_norm": 3.239496133866798, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 613609028, + "step": 6328 + }, + { + "epoch": 0.6186937817755181, + "loss": 0.12207116931676865, + "loss_ce": 0.006729987449944019, + "loss_iou": 0.248046875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 613609028, + "step": 6328 + }, + { + "epoch": 0.618791552600704, + "grad_norm": 30.856185898883222, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 613706240, + "step": 6329 + }, + { + "epoch": 0.618791552600704, + "loss": 0.07964907586574554, + "loss_ce": 0.004957304336130619, + "loss_iou": 0.42578125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 613706240, + "step": 6329 + }, + { + "epoch": 0.6188893234258898, + "grad_norm": 14.02493447807261, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 613803524, + "step": 6330 + }, + { + "epoch": 0.6188893234258898, + "loss": 0.06966620683670044, + "loss_ce": 0.0009558807360008359, + "loss_iou": 0.38671875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 613803524, + "step": 6330 + }, + { + "epoch": 0.6189870942510755, + "grad_norm": 9.611294305217749, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 613900444, + "step": 6331 + }, + { + "epoch": 0.6189870942510755, + "loss": 0.05624876916408539, + "loss_ce": 0.008008107542991638, + "loss_iou": 0.306640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 613900444, + "step": 6331 + }, + { + "epoch": 0.6190848650762613, + "grad_norm": 3.6834133176684283, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 613998184, + "step": 6332 + }, + { + "epoch": 0.6190848650762613, + "loss": 0.05795678496360779, + "loss_ce": 0.006404966115951538, + "loss_iou": 0.24609375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 613998184, + "step": 6332 + }, + { + "epoch": 0.619182635901447, + "grad_norm": 12.585686677397488, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 614096472, + "step": 6333 + }, + { + "epoch": 0.619182635901447, + "loss": 0.0827522724866867, + "loss_ce": 0.008335152640938759, + "loss_iou": 0.2578125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 614096472, + "step": 6333 + }, + { + "epoch": 0.6192804067266328, + "grad_norm": 7.551219540682164, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 614193592, + "step": 6334 + }, + { + "epoch": 0.6192804067266328, + "loss": 0.0926571935415268, + "loss_ce": 0.004400355275720358, + "loss_iou": 0.43359375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 614193592, + "step": 6334 + }, + { + "epoch": 0.6193781775518186, + "grad_norm": 5.419868183803029, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 614290960, + "step": 6335 + }, + { + "epoch": 0.6193781775518186, + "loss": 0.0546041764318943, + "loss_ce": 0.004219657741487026, + "loss_iou": 0.275390625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 614290960, + "step": 6335 + }, + { + "epoch": 0.6194759483770043, + "grad_norm": 2.7653325825978246, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 614387924, + "step": 6336 + }, + { + "epoch": 0.6194759483770043, + "loss": 0.09625313431024551, + "loss_ce": 0.004387596622109413, + "loss_iou": 0.2333984375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 614387924, + "step": 6336 + }, + { + "epoch": 0.6195737192021901, + "grad_norm": 7.170169773804552, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 614484484, + "step": 6337 + }, + { + "epoch": 0.6195737192021901, + "loss": 0.07309494912624359, + "loss_ce": 0.00806199386715889, + "loss_iou": 0.1884765625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 614484484, + "step": 6337 + }, + { + "epoch": 0.6196714900273759, + "grad_norm": 7.501362205641109, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 614581336, + "step": 6338 + }, + { + "epoch": 0.6196714900273759, + "loss": 0.0689626932144165, + "loss_ce": 0.001640919130295515, + "loss_iou": 0.275390625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 614581336, + "step": 6338 + }, + { + "epoch": 0.6197692608525616, + "grad_norm": 12.573604480963198, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 614678464, + "step": 6339 + }, + { + "epoch": 0.6197692608525616, + "loss": 0.1176600456237793, + "loss_ce": 0.007323751226067543, + "loss_iou": 0.43359375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 614678464, + "step": 6339 + }, + { + "epoch": 0.6198670316777474, + "grad_norm": 7.300366687353459, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 614775556, + "step": 6340 + }, + { + "epoch": 0.6198670316777474, + "loss": 0.1205591931939125, + "loss_ce": 0.010009266436100006, + "loss_iou": 0.279296875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 614775556, + "step": 6340 + }, + { + "epoch": 0.6199648025029332, + "grad_norm": 23.046292951841632, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 614872864, + "step": 6341 + }, + { + "epoch": 0.6199648025029332, + "loss": 0.05785442516207695, + "loss_ce": 0.004692803602665663, + "loss_iou": 0.28125, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 614872864, + "step": 6341 + }, + { + "epoch": 0.6200625733281189, + "grad_norm": 17.028871195721095, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 614969544, + "step": 6342 + }, + { + "epoch": 0.6200625733281189, + "loss": 0.07286239415407181, + "loss_ce": 0.002580407075583935, + "loss_iou": 0.294921875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 614969544, + "step": 6342 + }, + { + "epoch": 0.6201603441533047, + "grad_norm": 6.204018309217001, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 615066648, + "step": 6343 + }, + { + "epoch": 0.6201603441533047, + "loss": 0.05457727611064911, + "loss_ce": 0.0043071964755654335, + "loss_iou": 0.353515625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 615066648, + "step": 6343 + }, + { + "epoch": 0.6202581149784904, + "grad_norm": 12.47660820840797, + "learning_rate": 5e-05, + "loss": 0.1297, + "num_input_tokens_seen": 615163248, + "step": 6344 + }, + { + "epoch": 0.6202581149784904, + "loss": 0.13209441304206848, + "loss_ce": 0.004576698411256075, + "loss_iou": 0.333984375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 615163248, + "step": 6344 + }, + { + "epoch": 0.6203558858036762, + "grad_norm": 12.461153759716852, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 615260636, + "step": 6345 + }, + { + "epoch": 0.6203558858036762, + "loss": 0.031745828688144684, + "loss_ce": 0.005508342757821083, + "loss_iou": 0.25390625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 615260636, + "step": 6345 + }, + { + "epoch": 0.620453656628862, + "grad_norm": 12.690909610276343, + "learning_rate": 5e-05, + "loss": 0.1151, + "num_input_tokens_seen": 615357820, + "step": 6346 + }, + { + "epoch": 0.620453656628862, + "loss": 0.1557329148054123, + "loss_ce": 0.0028093354776501656, + "loss_iou": 0.3359375, + "loss_num": 0.030517578125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 615357820, + "step": 6346 + }, + { + "epoch": 0.6205514274540477, + "grad_norm": 17.523595614373313, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 615455140, + "step": 6347 + }, + { + "epoch": 0.6205514274540477, + "loss": 0.08333524316549301, + "loss_ce": 0.005851115100085735, + "loss_iou": 0.337890625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 615455140, + "step": 6347 + }, + { + "epoch": 0.6206491982792335, + "grad_norm": 6.95133526082118, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 615552500, + "step": 6348 + }, + { + "epoch": 0.6206491982792335, + "loss": 0.10219138860702515, + "loss_ce": 0.004321523010730743, + "loss_iou": 0.33203125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 615552500, + "step": 6348 + }, + { + "epoch": 0.6207469691044193, + "grad_norm": 3.801590378636726, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 615650236, + "step": 6349 + }, + { + "epoch": 0.6207469691044193, + "loss": 0.07837840914726257, + "loss_ce": 0.0036408565938472748, + "loss_iou": 0.4296875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 615650236, + "step": 6349 + }, + { + "epoch": 0.620844739929605, + "grad_norm": 3.356070258437182, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 615746916, + "step": 6350 + }, + { + "epoch": 0.620844739929605, + "loss": 0.04390301555395126, + "loss_ce": 0.003718994790688157, + "loss_iou": 0.330078125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 615746916, + "step": 6350 + }, + { + "epoch": 0.6209425107547908, + "grad_norm": 8.66873295822039, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 615843704, + "step": 6351 + }, + { + "epoch": 0.6209425107547908, + "loss": 0.10248884558677673, + "loss_ce": 0.003672922495752573, + "loss_iou": 0.296875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 615843704, + "step": 6351 + }, + { + "epoch": 0.6210402815799765, + "grad_norm": 3.4634966028692746, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 615940300, + "step": 6352 + }, + { + "epoch": 0.6210402815799765, + "loss": 0.081060491502285, + "loss_ce": 0.014242255128920078, + "loss_iou": 0.28515625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 615940300, + "step": 6352 + }, + { + "epoch": 0.6211380524051623, + "grad_norm": 11.318206342244196, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 616037540, + "step": 6353 + }, + { + "epoch": 0.6211380524051623, + "loss": 0.08043035864830017, + "loss_ce": 0.0016797506250441074, + "loss_iou": 0.3046875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 616037540, + "step": 6353 + }, + { + "epoch": 0.6212358232303481, + "grad_norm": 27.768528992375746, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 616135324, + "step": 6354 + }, + { + "epoch": 0.6212358232303481, + "loss": 0.10172514617443085, + "loss_ce": 0.004007864277809858, + "loss_iou": 0.330078125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 616135324, + "step": 6354 + }, + { + "epoch": 0.6213335940555338, + "grad_norm": 28.321174932613744, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 616232136, + "step": 6355 + }, + { + "epoch": 0.6213335940555338, + "loss": 0.08674664795398712, + "loss_ce": 0.009399846196174622, + "loss_iou": 0.30078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 616232136, + "step": 6355 + }, + { + "epoch": 0.6214313648807196, + "grad_norm": 6.593110565254418, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 616329524, + "step": 6356 + }, + { + "epoch": 0.6214313648807196, + "loss": 0.08335472643375397, + "loss_ce": 0.0050161066465079784, + "loss_iou": 0.37890625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 616329524, + "step": 6356 + }, + { + "epoch": 0.6215291357059054, + "grad_norm": 4.675699438811005, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 616427048, + "step": 6357 + }, + { + "epoch": 0.6215291357059054, + "loss": 0.09854045510292053, + "loss_ce": 0.006484186742454767, + "loss_iou": 0.267578125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 616427048, + "step": 6357 + }, + { + "epoch": 0.6216269065310911, + "grad_norm": 13.759761826174348, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 616524128, + "step": 6358 + }, + { + "epoch": 0.6216269065310911, + "loss": 0.12134234607219696, + "loss_ce": 0.002285648137331009, + "loss_iou": 0.263671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 616524128, + "step": 6358 + }, + { + "epoch": 0.6217246773562769, + "grad_norm": 4.134609242493207, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 616621016, + "step": 6359 + }, + { + "epoch": 0.6217246773562769, + "loss": 0.04830051213502884, + "loss_ce": 0.002485999371856451, + "loss_iou": 0.3125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 616621016, + "step": 6359 + }, + { + "epoch": 0.6218224481814627, + "grad_norm": 11.676920982516494, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 616717616, + "step": 6360 + }, + { + "epoch": 0.6218224481814627, + "loss": 0.053857963532209396, + "loss_ce": 0.004930657800287008, + "loss_iou": 0.287109375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 616717616, + "step": 6360 + }, + { + "epoch": 0.6219202190066484, + "grad_norm": 26.750048853014764, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 616815468, + "step": 6361 + }, + { + "epoch": 0.6219202190066484, + "loss": 0.06893070042133331, + "loss_ce": 0.0042029176838696, + "loss_iou": 0.330078125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 616815468, + "step": 6361 + }, + { + "epoch": 0.6220179898318342, + "grad_norm": 8.335520095122858, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 616912560, + "step": 6362 + }, + { + "epoch": 0.6220179898318342, + "loss": 0.08319669961929321, + "loss_ce": 0.002904945518821478, + "loss_iou": 0.375, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 616912560, + "step": 6362 + }, + { + "epoch": 0.6221157606570199, + "grad_norm": 5.978719974404242, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 617008784, + "step": 6363 + }, + { + "epoch": 0.6221157606570199, + "loss": 0.07356194406747818, + "loss_ce": 0.004180226009339094, + "loss_iou": 0.318359375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 617008784, + "step": 6363 + }, + { + "epoch": 0.6222135314822057, + "grad_norm": 4.3376580007270675, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 617106412, + "step": 6364 + }, + { + "epoch": 0.6222135314822057, + "loss": 0.053718261420726776, + "loss_ce": 0.004447633400559425, + "loss_iou": 0.376953125, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 617106412, + "step": 6364 + }, + { + "epoch": 0.6223113023073915, + "grad_norm": 6.5721066416489595, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 617203384, + "step": 6365 + }, + { + "epoch": 0.6223113023073915, + "loss": 0.1346866488456726, + "loss_ce": 0.007687750272452831, + "loss_iou": 0.296875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 617203384, + "step": 6365 + }, + { + "epoch": 0.6224090731325772, + "grad_norm": 10.691639856151388, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 617300188, + "step": 6366 + }, + { + "epoch": 0.6224090731325772, + "loss": 0.08986783027648926, + "loss_ce": 0.0036861891858279705, + "loss_iou": 0.294921875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 617300188, + "step": 6366 + }, + { + "epoch": 0.622506843957763, + "grad_norm": 7.052913507804746, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 617396452, + "step": 6367 + }, + { + "epoch": 0.622506843957763, + "loss": 0.06126416474580765, + "loss_ce": 0.003460054285824299, + "loss_iou": 0.25390625, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 617396452, + "step": 6367 + }, + { + "epoch": 0.6226046147829488, + "grad_norm": 12.135012679928762, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 617493056, + "step": 6368 + }, + { + "epoch": 0.6226046147829488, + "loss": 0.11344712972640991, + "loss_ce": 0.014066633768379688, + "loss_iou": 0.306640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 617493056, + "step": 6368 + }, + { + "epoch": 0.6227023856081345, + "grad_norm": 2.924002929875191, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 617589172, + "step": 6369 + }, + { + "epoch": 0.6227023856081345, + "loss": 0.06898067146539688, + "loss_ce": 0.006106831599026918, + "loss_iou": 0.23828125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 617589172, + "step": 6369 + }, + { + "epoch": 0.6228001564333203, + "grad_norm": 4.687797265501456, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 617685972, + "step": 6370 + }, + { + "epoch": 0.6228001564333203, + "loss": 0.08985023200511932, + "loss_ce": 0.003958507906645536, + "loss_iou": 0.2197265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 617685972, + "step": 6370 + }, + { + "epoch": 0.622897927258506, + "grad_norm": 11.071325340503154, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 617784672, + "step": 6371 + }, + { + "epoch": 0.622897927258506, + "loss": 0.1257512867450714, + "loss_ce": 0.004840639419853687, + "loss_iou": 0.34375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 617784672, + "step": 6371 + }, + { + "epoch": 0.6229956980836918, + "grad_norm": 7.573557893348777, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 617881192, + "step": 6372 + }, + { + "epoch": 0.6229956980836918, + "loss": 0.08258230239152908, + "loss_ce": 0.006379908416420221, + "loss_iou": 0.244140625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 617881192, + "step": 6372 + }, + { + "epoch": 0.6230934689088776, + "grad_norm": 2.7105771885806593, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 617978260, + "step": 6373 + }, + { + "epoch": 0.6230934689088776, + "loss": 0.05656304582953453, + "loss_ce": 0.0015779993264004588, + "loss_iou": 0.27734375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 617978260, + "step": 6373 + }, + { + "epoch": 0.6231912397340633, + "grad_norm": 9.18751355422497, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 618075556, + "step": 6374 + }, + { + "epoch": 0.6231912397340633, + "loss": 0.0883212611079216, + "loss_ce": 0.0050082728266716, + "loss_iou": 0.275390625, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 618075556, + "step": 6374 + }, + { + "epoch": 0.6232890105592491, + "grad_norm": 17.690036371802414, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 618172876, + "step": 6375 + }, + { + "epoch": 0.6232890105592491, + "loss": 0.12874452769756317, + "loss_ce": 0.005880758631974459, + "loss_iou": 0.310546875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 618172876, + "step": 6375 + }, + { + "epoch": 0.6233867813844349, + "grad_norm": 20.542458488831443, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 618269932, + "step": 6376 + }, + { + "epoch": 0.6233867813844349, + "loss": 0.1155833899974823, + "loss_ce": 0.0068340059369802475, + "loss_iou": 0.2578125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 618269932, + "step": 6376 + }, + { + "epoch": 0.6234845522096206, + "grad_norm": 74.58962652504351, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 618367212, + "step": 6377 + }, + { + "epoch": 0.6234845522096206, + "loss": 0.058028444647789, + "loss_ce": 0.0014107086462900043, + "loss_iou": 0.267578125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 618367212, + "step": 6377 + }, + { + "epoch": 0.6235823230348064, + "grad_norm": 41.45308479245761, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 618463928, + "step": 6378 + }, + { + "epoch": 0.6235823230348064, + "loss": 0.10197466611862183, + "loss_ce": 0.007446468807756901, + "loss_iou": 0.353515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 618463928, + "step": 6378 + }, + { + "epoch": 0.6236800938599921, + "grad_norm": 24.042683361223936, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 618560096, + "step": 6379 + }, + { + "epoch": 0.6236800938599921, + "loss": 0.0533234104514122, + "loss_ce": 0.004674578085541725, + "loss_iou": 0.25390625, + "loss_num": 0.009765625, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 618560096, + "step": 6379 + }, + { + "epoch": 0.6237778646851779, + "grad_norm": 16.18653703267114, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 618658004, + "step": 6380 + }, + { + "epoch": 0.6237778646851779, + "loss": 0.07688452303409576, + "loss_ce": 0.005374211817979813, + "loss_iou": 0.30859375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 618658004, + "step": 6380 + }, + { + "epoch": 0.6238756355103637, + "grad_norm": 15.50475732621522, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 618755340, + "step": 6381 + }, + { + "epoch": 0.6238756355103637, + "loss": 0.06327315419912338, + "loss_ce": 0.0008647023933008313, + "loss_iou": 0.30859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 618755340, + "step": 6381 + }, + { + "epoch": 0.6239734063355494, + "grad_norm": 5.015467169532976, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 618852000, + "step": 6382 + }, + { + "epoch": 0.6239734063355494, + "loss": 0.12030655145645142, + "loss_ce": 0.004065091721713543, + "loss_iou": 0.337890625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 618852000, + "step": 6382 + }, + { + "epoch": 0.6240711771607352, + "grad_norm": 3.6094387149577254, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 618949332, + "step": 6383 + }, + { + "epoch": 0.6240711771607352, + "loss": 0.12204893678426743, + "loss_ce": 0.0043578906916081905, + "loss_iou": 0.314453125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 618949332, + "step": 6383 + }, + { + "epoch": 0.624168947985921, + "grad_norm": 2.888388752182016, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 619045848, + "step": 6384 + }, + { + "epoch": 0.624168947985921, + "loss": 0.0730350911617279, + "loss_ce": 0.0017155137611553073, + "loss_iou": 0.2734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 619045848, + "step": 6384 + }, + { + "epoch": 0.6242667188111067, + "grad_norm": 15.589221160707908, + "learning_rate": 5e-05, + "loss": 0.1226, + "num_input_tokens_seen": 619143352, + "step": 6385 + }, + { + "epoch": 0.6242667188111067, + "loss": 0.11835861206054688, + "loss_ce": 0.0037651024758815765, + "loss_iou": 0.458984375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 619143352, + "step": 6385 + }, + { + "epoch": 0.6243644896362925, + "grad_norm": 12.813493165205456, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 619240492, + "step": 6386 + }, + { + "epoch": 0.6243644896362925, + "loss": 0.0932430848479271, + "loss_ce": 0.006237470544874668, + "loss_iou": 0.3515625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 619240492, + "step": 6386 + }, + { + "epoch": 0.6244622604614783, + "grad_norm": 30.767159744633073, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 619337268, + "step": 6387 + }, + { + "epoch": 0.6244622604614783, + "loss": 0.09543812274932861, + "loss_ce": 0.007532238494604826, + "loss_iou": 0.27734375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 619337268, + "step": 6387 + }, + { + "epoch": 0.624560031286664, + "grad_norm": 14.369205143095659, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 619433700, + "step": 6388 + }, + { + "epoch": 0.624560031286664, + "loss": 0.1587871015071869, + "loss_ce": 0.0027965055778622627, + "loss_iou": 0.291015625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 619433700, + "step": 6388 + }, + { + "epoch": 0.6246578021118498, + "grad_norm": 4.172367198737819, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 619529856, + "step": 6389 + }, + { + "epoch": 0.6246578021118498, + "loss": 0.04465874284505844, + "loss_ce": 0.0029030635487288237, + "loss_iou": 0.1572265625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 619529856, + "step": 6389 + }, + { + "epoch": 0.6247555729370355, + "grad_norm": 8.640860922883338, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 619626096, + "step": 6390 + }, + { + "epoch": 0.6247555729370355, + "loss": 0.14535799622535706, + "loss_ce": 0.005877403076738119, + "loss_iou": 0.2890625, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 619626096, + "step": 6390 + }, + { + "epoch": 0.6248533437622213, + "grad_norm": 16.45382333095278, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 619723144, + "step": 6391 + }, + { + "epoch": 0.6248533437622213, + "loss": 0.07688125967979431, + "loss_ce": 0.008209080435335636, + "loss_iou": 0.279296875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 619723144, + "step": 6391 + }, + { + "epoch": 0.6249511145874072, + "grad_norm": 15.789151312043792, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 619820712, + "step": 6392 + }, + { + "epoch": 0.6249511145874072, + "loss": 0.06400705128908157, + "loss_ce": 0.006077059544622898, + "loss_iou": 0.353515625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 619820712, + "step": 6392 + }, + { + "epoch": 0.6250488854125928, + "grad_norm": 3.604676870879048, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 619918136, + "step": 6393 + }, + { + "epoch": 0.6250488854125928, + "loss": 0.04165040701627731, + "loss_ce": 0.0056549254804849625, + "loss_iou": 0.36328125, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 619918136, + "step": 6393 + }, + { + "epoch": 0.6251466562377787, + "grad_norm": 234.32808526944672, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 620014724, + "step": 6394 + }, + { + "epoch": 0.6251466562377787, + "loss": 0.06654079258441925, + "loss_ce": 0.007115439511835575, + "loss_iou": 0.302734375, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 620014724, + "step": 6394 + }, + { + "epoch": 0.6252444270629645, + "grad_norm": 9.200496881266194, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 620111344, + "step": 6395 + }, + { + "epoch": 0.6252444270629645, + "loss": 0.11692960560321808, + "loss_ce": 0.005662514828145504, + "loss_iou": 0.279296875, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 620111344, + "step": 6395 + }, + { + "epoch": 0.6253421978881502, + "grad_norm": 7.82326887355235, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 620207768, + "step": 6396 + }, + { + "epoch": 0.6253421978881502, + "loss": 0.10483158379793167, + "loss_ce": 0.00645817257463932, + "loss_iou": 0.263671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 620207768, + "step": 6396 + }, + { + "epoch": 0.625439968713336, + "grad_norm": 8.858998059992043, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 620305660, + "step": 6397 + }, + { + "epoch": 0.625439968713336, + "loss": 0.05965602397918701, + "loss_ce": 0.0028628110885620117, + "loss_iou": 0.33203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 620305660, + "step": 6397 + }, + { + "epoch": 0.6255377395385217, + "grad_norm": 7.8059074078711745, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 620402508, + "step": 6398 + }, + { + "epoch": 0.6255377395385217, + "loss": 0.10842324793338776, + "loss_ce": 0.008874908089637756, + "loss_iou": 0.4296875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 620402508, + "step": 6398 + }, + { + "epoch": 0.6256355103637075, + "grad_norm": 5.989624445715544, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 620500632, + "step": 6399 + }, + { + "epoch": 0.6256355103637075, + "loss": 0.12150508165359497, + "loss_ce": 0.0074914065189659595, + "loss_iou": 0.333984375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 620500632, + "step": 6399 + }, + { + "epoch": 0.6257332811888933, + "grad_norm": 2.737099742287842, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 620597736, + "step": 6400 + }, + { + "epoch": 0.6257332811888933, + "loss": 0.08694691956043243, + "loss_ce": 0.0042900629341602325, + "loss_iou": 0.35546875, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 620597736, + "step": 6400 + }, + { + "epoch": 0.625831052014079, + "grad_norm": 9.895264870136838, + "learning_rate": 5e-05, + "loss": 0.1333, + "num_input_tokens_seen": 620695448, + "step": 6401 + }, + { + "epoch": 0.625831052014079, + "loss": 0.0972970649600029, + "loss_ce": 0.007209174335002899, + "loss_iou": 0.294921875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 620695448, + "step": 6401 + }, + { + "epoch": 0.6259288228392648, + "grad_norm": 6.215269200775689, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 620792004, + "step": 6402 + }, + { + "epoch": 0.6259288228392648, + "loss": 0.09095907211303711, + "loss_ce": 0.009820463135838509, + "loss_iou": 0.25, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 620792004, + "step": 6402 + }, + { + "epoch": 0.6260265936644506, + "grad_norm": 5.236547947906308, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 620888840, + "step": 6403 + }, + { + "epoch": 0.6260265936644506, + "loss": 0.09644252061843872, + "loss_ce": 0.007407492958009243, + "loss_iou": 0.3828125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 620888840, + "step": 6403 + }, + { + "epoch": 0.6261243644896363, + "grad_norm": 4.465142319081831, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 620986496, + "step": 6404 + }, + { + "epoch": 0.6261243644896363, + "loss": 0.05264444649219513, + "loss_ce": 0.003312782384455204, + "loss_iou": 0.435546875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 620986496, + "step": 6404 + }, + { + "epoch": 0.6262221353148221, + "grad_norm": 4.136237708619824, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 621084080, + "step": 6405 + }, + { + "epoch": 0.6262221353148221, + "loss": 0.08790412545204163, + "loss_ce": 0.0053845965303480625, + "loss_iou": 0.40234375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 621084080, + "step": 6405 + }, + { + "epoch": 0.6263199061400079, + "grad_norm": 2.5914777997341196, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 621180148, + "step": 6406 + }, + { + "epoch": 0.6263199061400079, + "loss": 0.05427316576242447, + "loss_ce": 0.018224280327558517, + "loss_iou": 0.208984375, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 621180148, + "step": 6406 + }, + { + "epoch": 0.6264176769651936, + "grad_norm": 3.368471175571595, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 621276020, + "step": 6407 + }, + { + "epoch": 0.6264176769651936, + "loss": 0.08475249260663986, + "loss_ce": 0.006108692381531, + "loss_iou": 0.240234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 621276020, + "step": 6407 + }, + { + "epoch": 0.6265154477903794, + "grad_norm": 4.153052131074102, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 621373068, + "step": 6408 + }, + { + "epoch": 0.6265154477903794, + "loss": 0.07366696000099182, + "loss_ce": 0.007459068670868874, + "loss_iou": 0.3515625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 621373068, + "step": 6408 + }, + { + "epoch": 0.6266132186155651, + "grad_norm": 10.798045187018094, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 621471244, + "step": 6409 + }, + { + "epoch": 0.6266132186155651, + "loss": 0.138790100812912, + "loss_ce": 0.006252266000956297, + "loss_iou": 0.330078125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 621471244, + "step": 6409 + }, + { + "epoch": 0.6267109894407509, + "grad_norm": 9.72675136047577, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 621567528, + "step": 6410 + }, + { + "epoch": 0.6267109894407509, + "loss": 0.11823554337024689, + "loss_ce": 0.0049237716011703014, + "loss_iou": 0.353515625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 621567528, + "step": 6410 + }, + { + "epoch": 0.6268087602659367, + "grad_norm": 9.543286447110173, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 621665820, + "step": 6411 + }, + { + "epoch": 0.6268087602659367, + "loss": 0.09142764657735825, + "loss_ce": 0.006665071938186884, + "loss_iou": 0.337890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 621665820, + "step": 6411 + }, + { + "epoch": 0.6269065310911224, + "grad_norm": 3.7531713244204057, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 621761692, + "step": 6412 + }, + { + "epoch": 0.6269065310911224, + "loss": 0.09865238517522812, + "loss_ce": 0.003376502776518464, + "loss_iou": 0.296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 621761692, + "step": 6412 + }, + { + "epoch": 0.6270043019163082, + "grad_norm": 4.953179955083765, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 621858452, + "step": 6413 + }, + { + "epoch": 0.6270043019163082, + "loss": 0.057903558015823364, + "loss_ce": 0.00351360603235662, + "loss_iou": 0.1572265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 621858452, + "step": 6413 + }, + { + "epoch": 0.627102072741494, + "grad_norm": 2.0283501479924193, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 621955032, + "step": 6414 + }, + { + "epoch": 0.627102072741494, + "loss": 0.06040485203266144, + "loss_ce": 0.0017958423122763634, + "loss_iou": 0.1845703125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 621955032, + "step": 6414 + }, + { + "epoch": 0.6271998435666797, + "grad_norm": 13.002623498204441, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 622052068, + "step": 6415 + }, + { + "epoch": 0.6271998435666797, + "loss": 0.1035630851984024, + "loss_ce": 0.0072038304060697556, + "loss_iou": 0.28125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 622052068, + "step": 6415 + }, + { + "epoch": 0.6272976143918655, + "grad_norm": 18.6674044663676, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 622148600, + "step": 6416 + }, + { + "epoch": 0.6272976143918655, + "loss": 0.05572137236595154, + "loss_ce": 0.00581368850544095, + "loss_iou": 0.2294921875, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 622148600, + "step": 6416 + }, + { + "epoch": 0.6273953852170512, + "grad_norm": 2.4143793287797086, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 622245700, + "step": 6417 + }, + { + "epoch": 0.6273953852170512, + "loss": 0.11349131166934967, + "loss_ce": 0.0044520036317408085, + "loss_iou": 0.25, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 622245700, + "step": 6417 + }, + { + "epoch": 0.627493156042237, + "grad_norm": 6.062821291218981, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 622342096, + "step": 6418 + }, + { + "epoch": 0.627493156042237, + "loss": 0.07634051889181137, + "loss_ce": 0.007691223174333572, + "loss_iou": 0.234375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 622342096, + "step": 6418 + }, + { + "epoch": 0.6275909268674228, + "grad_norm": 6.258085889654832, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 622439368, + "step": 6419 + }, + { + "epoch": 0.6275909268674228, + "loss": 0.07766345143318176, + "loss_ce": 0.004352604039013386, + "loss_iou": 0.263671875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 622439368, + "step": 6419 + }, + { + "epoch": 0.6276886976926085, + "grad_norm": 3.003024548687137, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 622535372, + "step": 6420 + }, + { + "epoch": 0.6276886976926085, + "loss": 0.07660893350839615, + "loss_ce": 0.004778184462338686, + "loss_iou": 0.236328125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 622535372, + "step": 6420 + }, + { + "epoch": 0.6277864685177943, + "grad_norm": 10.488227129832469, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 622632244, + "step": 6421 + }, + { + "epoch": 0.6277864685177943, + "loss": 0.08278297632932663, + "loss_ce": 0.004627461079508066, + "loss_iou": 0.2392578125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 622632244, + "step": 6421 + }, + { + "epoch": 0.6278842393429801, + "grad_norm": 1.9768945219331255, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 622728540, + "step": 6422 + }, + { + "epoch": 0.6278842393429801, + "loss": 0.09320992231369019, + "loss_ce": 0.0037171179428696632, + "loss_iou": 0.306640625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 622728540, + "step": 6422 + }, + { + "epoch": 0.6279820101681658, + "grad_norm": 4.3442165583416665, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 622825300, + "step": 6423 + }, + { + "epoch": 0.6279820101681658, + "loss": 0.03697381168603897, + "loss_ce": 0.003557064803317189, + "loss_iou": 0.279296875, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 622825300, + "step": 6423 + }, + { + "epoch": 0.6280797809933516, + "grad_norm": 15.389414815131765, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 622922948, + "step": 6424 + }, + { + "epoch": 0.6280797809933516, + "loss": 0.05747969448566437, + "loss_ce": 0.006584006827324629, + "loss_iou": 0.318359375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 622922948, + "step": 6424 + }, + { + "epoch": 0.6281775518185373, + "grad_norm": 10.141004254837467, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 623019620, + "step": 6425 + }, + { + "epoch": 0.6281775518185373, + "loss": 0.0710325688123703, + "loss_ce": 0.006823577452450991, + "loss_iou": 0.37109375, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 623019620, + "step": 6425 + }, + { + "epoch": 0.6282753226437231, + "grad_norm": 7.046061843604366, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 623116244, + "step": 6426 + }, + { + "epoch": 0.6282753226437231, + "loss": 0.06116719916462898, + "loss_ce": 0.0030591085087507963, + "loss_iou": 0.2109375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 623116244, + "step": 6426 + }, + { + "epoch": 0.6283730934689089, + "grad_norm": 13.68738500717348, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 623213452, + "step": 6427 + }, + { + "epoch": 0.6283730934689089, + "loss": 0.04221007600426674, + "loss_ce": 0.008282160386443138, + "loss_iou": 0.36328125, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 623213452, + "step": 6427 + }, + { + "epoch": 0.6284708642940946, + "grad_norm": 16.32963930035711, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 623309544, + "step": 6428 + }, + { + "epoch": 0.6284708642940946, + "loss": 0.0914618968963623, + "loss_ce": 0.006134752184152603, + "loss_iou": 0.1953125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 623309544, + "step": 6428 + }, + { + "epoch": 0.6285686351192804, + "grad_norm": 9.093477068691362, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 623406924, + "step": 6429 + }, + { + "epoch": 0.6285686351192804, + "loss": 0.08283483237028122, + "loss_ce": 0.00710546039044857, + "loss_iou": 0.318359375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 623406924, + "step": 6429 + }, + { + "epoch": 0.6286664059444662, + "grad_norm": 5.9727106031999195, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 623504012, + "step": 6430 + }, + { + "epoch": 0.6286664059444662, + "loss": 0.03730776906013489, + "loss_ce": 0.0029144571162760258, + "loss_iou": 0.369140625, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 623504012, + "step": 6430 + }, + { + "epoch": 0.6287641767696519, + "grad_norm": 8.192243165181704, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 623600940, + "step": 6431 + }, + { + "epoch": 0.6287641767696519, + "loss": 0.0564313568174839, + "loss_ce": 0.007347646169364452, + "loss_iou": 0.302734375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 623600940, + "step": 6431 + }, + { + "epoch": 0.6288619475948377, + "grad_norm": 11.840269957735028, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 623698124, + "step": 6432 + }, + { + "epoch": 0.6288619475948377, + "loss": 0.08377750217914581, + "loss_ce": 0.006476474925875664, + "loss_iou": 0.416015625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 623698124, + "step": 6432 + }, + { + "epoch": 0.6289597184200235, + "grad_norm": 6.510834788223811, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 623794732, + "step": 6433 + }, + { + "epoch": 0.6289597184200235, + "loss": 0.11747278273105621, + "loss_ce": 0.004649299196898937, + "loss_iou": 0.2578125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 623794732, + "step": 6433 + }, + { + "epoch": 0.6290574892452092, + "grad_norm": 7.136407147033829, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 623891588, + "step": 6434 + }, + { + "epoch": 0.6290574892452092, + "loss": 0.08594825118780136, + "loss_ce": 0.002665781881660223, + "loss_iou": 0.33203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 623891588, + "step": 6434 + }, + { + "epoch": 0.629155260070395, + "grad_norm": 11.134998691109564, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 623988548, + "step": 6435 + }, + { + "epoch": 0.629155260070395, + "loss": 0.09404550492763519, + "loss_ce": 0.002607214031741023, + "loss_iou": 0.2890625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 623988548, + "step": 6435 + }, + { + "epoch": 0.6292530308955807, + "grad_norm": 72.05778431194535, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 624084708, + "step": 6436 + }, + { + "epoch": 0.6292530308955807, + "loss": 0.08584250509738922, + "loss_ce": 0.004845032934099436, + "loss_iou": 0.232421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 624084708, + "step": 6436 + }, + { + "epoch": 0.6293508017207665, + "grad_norm": 28.176794513056187, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 624181236, + "step": 6437 + }, + { + "epoch": 0.6293508017207665, + "loss": 0.08523516356945038, + "loss_ce": 0.006259481888264418, + "loss_iou": 0.294921875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 624181236, + "step": 6437 + }, + { + "epoch": 0.6294485725459523, + "grad_norm": 21.31108650652104, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 624278552, + "step": 6438 + }, + { + "epoch": 0.6294485725459523, + "loss": 0.08402693271636963, + "loss_ce": 0.006725911982357502, + "loss_iou": 0.39453125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 624278552, + "step": 6438 + }, + { + "epoch": 0.629546343371138, + "grad_norm": 5.508692753643831, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 624375200, + "step": 6439 + }, + { + "epoch": 0.629546343371138, + "loss": 0.07966267317533493, + "loss_ce": 0.006069535855203867, + "loss_iou": 0.30859375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 624375200, + "step": 6439 + }, + { + "epoch": 0.6296441141963238, + "grad_norm": 3.178474511451405, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 624472200, + "step": 6440 + }, + { + "epoch": 0.6296441141963238, + "loss": 0.07470352202653885, + "loss_ce": 0.0025599654763936996, + "loss_iou": 0.283203125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 624472200, + "step": 6440 + }, + { + "epoch": 0.6297418850215096, + "grad_norm": 22.52560170648612, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 624568808, + "step": 6441 + }, + { + "epoch": 0.6297418850215096, + "loss": 0.08471526205539703, + "loss_ce": 0.006101979874074459, + "loss_iou": 0.2890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 624568808, + "step": 6441 + }, + { + "epoch": 0.6298396558466953, + "grad_norm": 10.076335252411422, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 624666564, + "step": 6442 + }, + { + "epoch": 0.6298396558466953, + "loss": 0.09543415904045105, + "loss_ce": 0.007726635783910751, + "loss_iou": 0.34375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 624666564, + "step": 6442 + }, + { + "epoch": 0.6299374266718811, + "grad_norm": 7.532063538013871, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 624763088, + "step": 6443 + }, + { + "epoch": 0.6299374266718811, + "loss": 0.058944880962371826, + "loss_ce": 0.004760921001434326, + "loss_iou": 0.37890625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 624763088, + "step": 6443 + }, + { + "epoch": 0.6300351974970668, + "grad_norm": 4.6981447709205675, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 624859516, + "step": 6444 + }, + { + "epoch": 0.6300351974970668, + "loss": 0.0846557691693306, + "loss_ce": 0.005355844274163246, + "loss_iou": 0.25390625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 624859516, + "step": 6444 + }, + { + "epoch": 0.6301329683222526, + "grad_norm": 10.405116088104345, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 624956496, + "step": 6445 + }, + { + "epoch": 0.6301329683222526, + "loss": 0.075706847012043, + "loss_ce": 0.0084537323564291, + "loss_iou": 0.2734375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 624956496, + "step": 6445 + }, + { + "epoch": 0.6302307391474384, + "grad_norm": 2.9295355222427593, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 625052872, + "step": 6446 + }, + { + "epoch": 0.6302307391474384, + "loss": 0.054577723145484924, + "loss_ce": 0.00376595975831151, + "loss_iou": 0.294921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 625052872, + "step": 6446 + }, + { + "epoch": 0.6303285099726241, + "grad_norm": 4.072810765318111, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 625150244, + "step": 6447 + }, + { + "epoch": 0.6303285099726241, + "loss": 0.04209395870566368, + "loss_ce": 0.004587855190038681, + "loss_iou": 0.34765625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 625150244, + "step": 6447 + }, + { + "epoch": 0.6304262807978099, + "grad_norm": 3.5374275876948165, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 625247312, + "step": 6448 + }, + { + "epoch": 0.6304262807978099, + "loss": 0.0982307493686676, + "loss_ce": 0.003870400134474039, + "loss_iou": 0.330078125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 625247312, + "step": 6448 + }, + { + "epoch": 0.6305240516229957, + "grad_norm": 2.759463691539506, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 625344192, + "step": 6449 + }, + { + "epoch": 0.6305240516229957, + "loss": 0.08348174393177032, + "loss_ce": 0.0038003495428711176, + "loss_iou": 0.23828125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 625344192, + "step": 6449 + }, + { + "epoch": 0.6306218224481814, + "grad_norm": 3.2018757596695337, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 625441116, + "step": 6450 + }, + { + "epoch": 0.6306218224481814, + "loss": 0.0701296478509903, + "loss_ce": 0.003998058848083019, + "loss_iou": 0.271484375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 625441116, + "step": 6450 + }, + { + "epoch": 0.6307195932733672, + "grad_norm": 12.678234970724086, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 625538256, + "step": 6451 + }, + { + "epoch": 0.6307195932733672, + "loss": 0.07916639000177383, + "loss_ce": 0.006809213664382696, + "loss_iou": 0.328125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 625538256, + "step": 6451 + }, + { + "epoch": 0.630817364098553, + "grad_norm": 10.553521021996843, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 625634864, + "step": 6452 + }, + { + "epoch": 0.630817364098553, + "loss": 0.07407626509666443, + "loss_ce": 0.004145241342484951, + "loss_iou": 0.20703125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 625634864, + "step": 6452 + }, + { + "epoch": 0.6309151349237387, + "grad_norm": 1.812226704467405, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 625731168, + "step": 6453 + }, + { + "epoch": 0.6309151349237387, + "loss": 0.06687857210636139, + "loss_ce": 0.0040123616345226765, + "loss_iou": 0.2265625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 625731168, + "step": 6453 + }, + { + "epoch": 0.6310129057489245, + "grad_norm": 8.720054079805903, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 625827544, + "step": 6454 + }, + { + "epoch": 0.6310129057489245, + "loss": 0.15715087950229645, + "loss_ce": 0.013474119827151299, + "loss_iou": 0.2490234375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 625827544, + "step": 6454 + }, + { + "epoch": 0.6311106765741102, + "grad_norm": 5.603561132986548, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 625924504, + "step": 6455 + }, + { + "epoch": 0.6311106765741102, + "loss": 0.0712691992521286, + "loss_ce": 0.003413365688174963, + "loss_iou": 0.2060546875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 625924504, + "step": 6455 + }, + { + "epoch": 0.631208447399296, + "grad_norm": 3.9201018495303224, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 626021536, + "step": 6456 + }, + { + "epoch": 0.631208447399296, + "loss": 0.09336888790130615, + "loss_ce": 0.006180170923471451, + "loss_iou": 0.244140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 626021536, + "step": 6456 + }, + { + "epoch": 0.6313062182244819, + "grad_norm": 4.6672638018280646, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 626119188, + "step": 6457 + }, + { + "epoch": 0.6313062182244819, + "loss": 0.06043541803956032, + "loss_ce": 0.004016045015305281, + "loss_iou": 0.35546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 626119188, + "step": 6457 + }, + { + "epoch": 0.6314039890496675, + "grad_norm": 3.156408737412138, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 626215920, + "step": 6458 + }, + { + "epoch": 0.6314039890496675, + "loss": 0.05463489890098572, + "loss_ce": 0.007279244251549244, + "loss_iou": 0.24609375, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 626215920, + "step": 6458 + }, + { + "epoch": 0.6315017598748534, + "grad_norm": 31.89142593304038, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 626312692, + "step": 6459 + }, + { + "epoch": 0.6315017598748534, + "loss": 0.12344594299793243, + "loss_ce": 0.006624658592045307, + "loss_iou": 0.28125, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 626312692, + "step": 6459 + }, + { + "epoch": 0.6315995307000392, + "grad_norm": 172.4670187642755, + "learning_rate": 5e-05, + "loss": 0.1186, + "num_input_tokens_seen": 626410600, + "step": 6460 + }, + { + "epoch": 0.6315995307000392, + "loss": 0.16635531187057495, + "loss_ce": 0.007358722388744354, + "loss_iou": 0.341796875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 626410600, + "step": 6460 + }, + { + "epoch": 0.6316973015252249, + "grad_norm": 29.02167942779421, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 626507732, + "step": 6461 + }, + { + "epoch": 0.6316973015252249, + "loss": 0.06957779079675674, + "loss_ce": 0.011045078746974468, + "loss_iou": 0.271484375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 626507732, + "step": 6461 + }, + { + "epoch": 0.6317950723504107, + "grad_norm": 11.265369091075272, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 626604796, + "step": 6462 + }, + { + "epoch": 0.6317950723504107, + "loss": 0.09732451289892197, + "loss_ce": 0.00467314338311553, + "loss_iou": 0.3203125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 626604796, + "step": 6462 + }, + { + "epoch": 0.6318928431755964, + "grad_norm": 5.956936023044149, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 626702204, + "step": 6463 + }, + { + "epoch": 0.6318928431755964, + "loss": 0.07463550567626953, + "loss_ce": 0.004834173247218132, + "loss_iou": 0.302734375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 626702204, + "step": 6463 + }, + { + "epoch": 0.6319906140007822, + "grad_norm": 3.174538322918183, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 626799200, + "step": 6464 + }, + { + "epoch": 0.6319906140007822, + "loss": 0.06892389804124832, + "loss_ce": 0.002098031109198928, + "loss_iou": 0.2138671875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 626799200, + "step": 6464 + }, + { + "epoch": 0.632088384825968, + "grad_norm": 9.187087820920885, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 626897000, + "step": 6465 + }, + { + "epoch": 0.632088384825968, + "loss": 0.07042605429887772, + "loss_ce": 0.004904808942228556, + "loss_iou": 0.314453125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 626897000, + "step": 6465 + }, + { + "epoch": 0.6321861556511537, + "grad_norm": 4.620745687427967, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 626994408, + "step": 6466 + }, + { + "epoch": 0.6321861556511537, + "loss": 0.10512635111808777, + "loss_ce": 0.008721325546503067, + "loss_iou": 0.349609375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 626994408, + "step": 6466 + }, + { + "epoch": 0.6322839264763395, + "grad_norm": 16.86415966060157, + "learning_rate": 5e-05, + "loss": 0.1362, + "num_input_tokens_seen": 627090648, + "step": 6467 + }, + { + "epoch": 0.6322839264763395, + "loss": 0.08023670315742493, + "loss_ce": 0.0038607404567301273, + "loss_iou": 0.263671875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 627090648, + "step": 6467 + }, + { + "epoch": 0.6323816973015253, + "grad_norm": 7.049928781890105, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 627188672, + "step": 6468 + }, + { + "epoch": 0.6323816973015253, + "loss": 0.05403111129999161, + "loss_ce": 0.0023648496717214584, + "loss_iou": 0.228515625, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 627188672, + "step": 6468 + }, + { + "epoch": 0.632479468126711, + "grad_norm": 5.001245869094783, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 627285616, + "step": 6469 + }, + { + "epoch": 0.632479468126711, + "loss": 0.07100673019886017, + "loss_ce": 0.0063704936765134335, + "loss_iou": 0.302734375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 627285616, + "step": 6469 + }, + { + "epoch": 0.6325772389518968, + "grad_norm": 4.700643727923067, + "learning_rate": 5e-05, + "loss": 0.1167, + "num_input_tokens_seen": 627382100, + "step": 6470 + }, + { + "epoch": 0.6325772389518968, + "loss": 0.1209157258272171, + "loss_ce": 0.01076253317296505, + "loss_iou": 0.23828125, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 627382100, + "step": 6470 + }, + { + "epoch": 0.6326750097770825, + "grad_norm": 13.886264340320274, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 627479064, + "step": 6471 + }, + { + "epoch": 0.6326750097770825, + "loss": 0.07139936834573746, + "loss_ce": 0.0016361847519874573, + "loss_iou": 0.3125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 627479064, + "step": 6471 + }, + { + "epoch": 0.6327727806022683, + "grad_norm": 15.573165358831028, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 627575048, + "step": 6472 + }, + { + "epoch": 0.6327727806022683, + "loss": 0.06190664321184158, + "loss_ce": 0.004144497215747833, + "loss_iou": 0.18359375, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 627575048, + "step": 6472 + }, + { + "epoch": 0.6328705514274541, + "grad_norm": 14.373672024445227, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 627672076, + "step": 6473 + }, + { + "epoch": 0.6328705514274541, + "loss": 0.065865159034729, + "loss_ce": 0.002197862835600972, + "loss_iou": 0.337890625, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 627672076, + "step": 6473 + }, + { + "epoch": 0.6329683222526398, + "grad_norm": 7.389525313551636, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 627769640, + "step": 6474 + }, + { + "epoch": 0.6329683222526398, + "loss": 0.08995390683412552, + "loss_ce": 0.006579883396625519, + "loss_iou": 0.341796875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 627769640, + "step": 6474 + }, + { + "epoch": 0.6330660930778256, + "grad_norm": 11.74487113757929, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 627866248, + "step": 6475 + }, + { + "epoch": 0.6330660930778256, + "loss": 0.0864148885011673, + "loss_ce": 0.005238130688667297, + "loss_iou": 0.30859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 627866248, + "step": 6475 + }, + { + "epoch": 0.6331638639030114, + "grad_norm": 43.15714359467391, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 627963164, + "step": 6476 + }, + { + "epoch": 0.6331638639030114, + "loss": 0.06901402771472931, + "loss_ce": 0.0034470073878765106, + "loss_iou": 0.236328125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 627963164, + "step": 6476 + }, + { + "epoch": 0.6332616347281971, + "grad_norm": 6.627006588922846, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 628059568, + "step": 6477 + }, + { + "epoch": 0.6332616347281971, + "loss": 0.05385766178369522, + "loss_ce": 0.004258973523974419, + "loss_iou": 0.2001953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 628059568, + "step": 6477 + }, + { + "epoch": 0.6333594055533829, + "grad_norm": 5.705420877328226, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 628155652, + "step": 6478 + }, + { + "epoch": 0.6333594055533829, + "loss": 0.050111547112464905, + "loss_ce": 0.005685583688318729, + "loss_iou": 0.271484375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 628155652, + "step": 6478 + }, + { + "epoch": 0.6334571763785687, + "grad_norm": 9.270743266930454, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 628252620, + "step": 6479 + }, + { + "epoch": 0.6334571763785687, + "loss": 0.06180327385663986, + "loss_ce": 0.007298882119357586, + "loss_iou": 0.2421875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 628252620, + "step": 6479 + }, + { + "epoch": 0.6335549472037544, + "grad_norm": 4.220095621282741, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 628349924, + "step": 6480 + }, + { + "epoch": 0.6335549472037544, + "loss": 0.10155823081731796, + "loss_ce": 0.004542844370007515, + "loss_iou": 0.357421875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 628349924, + "step": 6480 + }, + { + "epoch": 0.6336527180289402, + "grad_norm": 5.190353535737397, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 628446480, + "step": 6481 + }, + { + "epoch": 0.6336527180289402, + "loss": 0.06549523770809174, + "loss_ce": 0.003361445851624012, + "loss_iou": 0.26953125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 628446480, + "step": 6481 + }, + { + "epoch": 0.6337504888541259, + "grad_norm": 7.220348270743532, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 628544308, + "step": 6482 + }, + { + "epoch": 0.6337504888541259, + "loss": 0.07053543627262115, + "loss_ce": 0.004861602559685707, + "loss_iou": 0.38671875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 628544308, + "step": 6482 + }, + { + "epoch": 0.6338482596793117, + "grad_norm": 12.328600481158158, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 628641284, + "step": 6483 + }, + { + "epoch": 0.6338482596793117, + "loss": 0.07961545884609222, + "loss_ce": 0.010325297713279724, + "loss_iou": 0.33984375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 628641284, + "step": 6483 + }, + { + "epoch": 0.6339460305044975, + "grad_norm": 28.06034674413834, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 628739256, + "step": 6484 + }, + { + "epoch": 0.6339460305044975, + "loss": 0.11059236526489258, + "loss_ce": 0.008160123601555824, + "loss_iou": 0.279296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 628739256, + "step": 6484 + }, + { + "epoch": 0.6340438013296832, + "grad_norm": 6.5934371828420115, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 628835624, + "step": 6485 + }, + { + "epoch": 0.6340438013296832, + "loss": 0.10082249343395233, + "loss_ce": 0.005723994225263596, + "loss_iou": 0.328125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 628835624, + "step": 6485 + }, + { + "epoch": 0.634141572154869, + "grad_norm": 22.81170235148348, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 628933064, + "step": 6486 + }, + { + "epoch": 0.634141572154869, + "loss": 0.11355311423540115, + "loss_ce": 0.003766132052987814, + "loss_iou": 0.279296875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 628933064, + "step": 6486 + }, + { + "epoch": 0.6342393429800548, + "grad_norm": 11.213988217539566, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 629029876, + "step": 6487 + }, + { + "epoch": 0.6342393429800548, + "loss": 0.07586327940225601, + "loss_ce": 0.006252681836485863, + "loss_iou": 0.337890625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 629029876, + "step": 6487 + }, + { + "epoch": 0.6343371138052405, + "grad_norm": 5.092952577387985, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 629126236, + "step": 6488 + }, + { + "epoch": 0.6343371138052405, + "loss": 0.12457121908664703, + "loss_ce": 0.004804984666407108, + "loss_iou": 0.25390625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 629126236, + "step": 6488 + }, + { + "epoch": 0.6344348846304263, + "grad_norm": 1.9376722006212144, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 629222680, + "step": 6489 + }, + { + "epoch": 0.6344348846304263, + "loss": 0.06544347107410431, + "loss_ce": 0.00533146969974041, + "loss_iou": 0.1884765625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 629222680, + "step": 6489 + }, + { + "epoch": 0.634532655455612, + "grad_norm": 15.544946918151473, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 629319328, + "step": 6490 + }, + { + "epoch": 0.634532655455612, + "loss": 0.0738978236913681, + "loss_ce": 0.006682860665023327, + "loss_iou": 0.19921875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 629319328, + "step": 6490 + }, + { + "epoch": 0.6346304262807978, + "grad_norm": 3.9564689525769503, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 629417856, + "step": 6491 + }, + { + "epoch": 0.6346304262807978, + "loss": 0.08339785039424896, + "loss_ce": 0.003548611421138048, + "loss_iou": 0.33203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 629417856, + "step": 6491 + }, + { + "epoch": 0.6347281971059836, + "grad_norm": 3.2332553318641475, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 629514720, + "step": 6492 + }, + { + "epoch": 0.6347281971059836, + "loss": 0.07458564639091492, + "loss_ce": 0.006684032268822193, + "loss_iou": 0.451171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 629514720, + "step": 6492 + }, + { + "epoch": 0.6348259679311693, + "grad_norm": 4.180824400253508, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 629612280, + "step": 6493 + }, + { + "epoch": 0.6348259679311693, + "loss": 0.060094207525253296, + "loss_ce": 0.006978358142077923, + "loss_iou": 0.310546875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 629612280, + "step": 6493 + }, + { + "epoch": 0.6349237387563551, + "grad_norm": 7.13318101099545, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 629709044, + "step": 6494 + }, + { + "epoch": 0.6349237387563551, + "loss": 0.08839826285839081, + "loss_ce": 0.007587720640003681, + "loss_iou": 0.302734375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 629709044, + "step": 6494 + }, + { + "epoch": 0.6350215095815409, + "grad_norm": 29.833739918277953, + "learning_rate": 5e-05, + "loss": 0.1119, + "num_input_tokens_seen": 629806636, + "step": 6495 + }, + { + "epoch": 0.6350215095815409, + "loss": 0.15105684101581573, + "loss_ce": 0.006220403127372265, + "loss_iou": 0.263671875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 629806636, + "step": 6495 + }, + { + "epoch": 0.6351192804067266, + "grad_norm": 15.122884135868293, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 629903552, + "step": 6496 + }, + { + "epoch": 0.6351192804067266, + "loss": 0.03608560934662819, + "loss_ce": 0.005255224648863077, + "loss_iou": 0.30859375, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 629903552, + "step": 6496 + }, + { + "epoch": 0.6352170512319124, + "grad_norm": 12.456900243255864, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 630000576, + "step": 6497 + }, + { + "epoch": 0.6352170512319124, + "loss": 0.07961055636405945, + "loss_ce": 0.004262648057192564, + "loss_iou": 0.353515625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 630000576, + "step": 6497 + }, + { + "epoch": 0.6353148220570982, + "grad_norm": 16.053640072078764, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 630097404, + "step": 6498 + }, + { + "epoch": 0.6353148220570982, + "loss": 0.09333956241607666, + "loss_ce": 0.005044576711952686, + "loss_iou": 0.326171875, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 630097404, + "step": 6498 + }, + { + "epoch": 0.6354125928822839, + "grad_norm": 3.416363919146848, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 630193848, + "step": 6499 + }, + { + "epoch": 0.6354125928822839, + "loss": 0.06615455448627472, + "loss_ce": 0.003906321711838245, + "loss_iou": 0.3046875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 630193848, + "step": 6499 + }, + { + "epoch": 0.6355103637074697, + "grad_norm": 6.765944148209925, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 630290912, + "step": 6500 + }, + { + "epoch": 0.6355103637074697, + "eval_seeclick_CIoU": 0.5049909949302673, + "eval_seeclick_GIoU": 0.5115578323602676, + "eval_seeclick_IoU": 0.5486220419406891, + "eval_seeclick_MAE_all": 0.07419334724545479, + "eval_seeclick_MAE_h": 0.05006022937595844, + "eval_seeclick_MAE_w": 0.0924823209643364, + "eval_seeclick_MAE_x": 0.1031629741191864, + "eval_seeclick_MAE_y": 0.05106786824762821, + "eval_seeclick_NUM_probability": 0.9999975562095642, + "eval_seeclick_inside_bbox": 0.7840909063816071, + "eval_seeclick_loss": 0.28482094407081604, + "eval_seeclick_loss_ce": 0.009563700761646032, + "eval_seeclick_loss_iou": 0.3763427734375, + "eval_seeclick_loss_num": 0.05464935302734375, + "eval_seeclick_loss_xval": 0.2732391357421875, + "eval_seeclick_runtime": 77.7987, + "eval_seeclick_samples_per_second": 0.553, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 630290912, + "step": 6500 + }, + { + "epoch": 0.6355103637074697, + "eval_icons_CIoU": 0.6898450255393982, + "eval_icons_GIoU": 0.6878740787506104, + "eval_icons_IoU": 0.7202351689338684, + "eval_icons_MAE_all": 0.05832792446017265, + "eval_icons_MAE_h": 0.055618878453969955, + "eval_icons_MAE_w": 0.062069691717624664, + "eval_icons_MAE_x": 0.06225305236876011, + "eval_icons_MAE_y": 0.05337008461356163, + "eval_icons_NUM_probability": 0.9999969601631165, + "eval_icons_inside_bbox": 0.8194444477558136, + "eval_icons_loss": 0.18150492012500763, + "eval_icons_loss_ce": 1.1253067100369663e-06, + "eval_icons_loss_iou": 0.33538818359375, + "eval_icons_loss_num": 0.040630340576171875, + "eval_icons_loss_xval": 0.20307159423828125, + "eval_icons_runtime": 98.0138, + "eval_icons_samples_per_second": 0.51, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 630290912, + "step": 6500 + }, + { + "epoch": 0.6355103637074697, + "eval_screenspot_CIoU": 0.2990948458512624, + "eval_screenspot_GIoU": 0.2852509468793869, + "eval_screenspot_IoU": 0.40183932582537335, + "eval_screenspot_MAE_all": 0.17105235159397125, + "eval_screenspot_MAE_h": 0.1340156098206838, + "eval_screenspot_MAE_w": 0.22254822651545206, + "eval_screenspot_MAE_x": 0.20677055915196738, + "eval_screenspot_MAE_y": 0.12087502330541611, + "eval_screenspot_NUM_probability": 0.9999963243802389, + "eval_screenspot_inside_bbox": 0.6612499952316284, + "eval_screenspot_loss": 0.5911309719085693, + "eval_screenspot_loss_ce": 0.015428178167591492, + "eval_screenspot_loss_iou": 0.3406982421875, + "eval_screenspot_loss_num": 0.11702473958333333, + "eval_screenspot_loss_xval": 0.5850830078125, + "eval_screenspot_runtime": 162.471, + "eval_screenspot_samples_per_second": 0.548, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 630290912, + "step": 6500 + }, + { + "epoch": 0.6355103637074697, + "eval_compot_CIoU": 0.4898149073123932, + "eval_compot_GIoU": 0.4764658957719803, + "eval_compot_IoU": 0.552081972360611, + "eval_compot_MAE_all": 0.09602932259440422, + "eval_compot_MAE_h": 0.07965122163295746, + "eval_compot_MAE_w": 0.11215659230947495, + "eval_compot_MAE_x": 0.10947460308670998, + "eval_compot_MAE_y": 0.0828348807990551, + "eval_compot_NUM_probability": 0.9999919533729553, + "eval_compot_inside_bbox": 0.7638888955116272, + "eval_compot_loss": 0.2993195652961731, + "eval_compot_loss_ce": 0.012317383661866188, + "eval_compot_loss_iou": 0.4140625, + "eval_compot_loss_num": 0.05297088623046875, + "eval_compot_loss_xval": 0.264739990234375, + "eval_compot_runtime": 88.3454, + "eval_compot_samples_per_second": 0.566, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 630290912, + "step": 6500 + }, + { + "epoch": 0.6355103637074697, + "loss": 0.24142618477344513, + "loss_ce": 0.01278848759829998, + "loss_iou": 0.4296875, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 630290912, + "step": 6500 + }, + { + "epoch": 0.6356081345326554, + "grad_norm": 2.4754433104846125, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 630387356, + "step": 6501 + }, + { + "epoch": 0.6356081345326554, + "loss": 0.04165951907634735, + "loss_ce": 0.006419347133487463, + "loss_iou": 0.26171875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 630387356, + "step": 6501 + }, + { + "epoch": 0.6357059053578412, + "grad_norm": 3.698295879029521, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 630484752, + "step": 6502 + }, + { + "epoch": 0.6357059053578412, + "loss": 0.08785524219274521, + "loss_ce": 0.007258320227265358, + "loss_iou": 0.30859375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 630484752, + "step": 6502 + }, + { + "epoch": 0.635803676183027, + "grad_norm": 6.559245545823897, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 630581952, + "step": 6503 + }, + { + "epoch": 0.635803676183027, + "loss": 0.056909170001745224, + "loss_ce": 0.0018554616253823042, + "loss_iou": 0.427734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 630581952, + "step": 6503 + }, + { + "epoch": 0.6359014470082127, + "grad_norm": 8.573942481787036, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 630678748, + "step": 6504 + }, + { + "epoch": 0.6359014470082127, + "loss": 0.07673798501491547, + "loss_ce": 0.0032058777287602425, + "loss_iou": 0.224609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 630678748, + "step": 6504 + }, + { + "epoch": 0.6359992178333985, + "grad_norm": 5.57535508953917, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 630776168, + "step": 6505 + }, + { + "epoch": 0.6359992178333985, + "loss": 0.06246631592512131, + "loss_ce": 0.0033995441626757383, + "loss_iou": 0.31640625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 630776168, + "step": 6505 + }, + { + "epoch": 0.6360969886585843, + "grad_norm": 5.7223904877577905, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 630873936, + "step": 6506 + }, + { + "epoch": 0.6360969886585843, + "loss": 0.09753024578094482, + "loss_ce": 0.004573700483888388, + "loss_iou": 0.404296875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 630873936, + "step": 6506 + }, + { + "epoch": 0.63619475948377, + "grad_norm": 5.363580107986873, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 630970620, + "step": 6507 + }, + { + "epoch": 0.63619475948377, + "loss": 0.05961993709206581, + "loss_ce": 0.0039635030552744865, + "loss_iou": 0.2734375, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 630970620, + "step": 6507 + }, + { + "epoch": 0.6362925303089558, + "grad_norm": 4.580978840907715, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 631066708, + "step": 6508 + }, + { + "epoch": 0.6362925303089558, + "loss": 0.11219849437475204, + "loss_ce": 0.004006057046353817, + "loss_iou": 0.154296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 631066708, + "step": 6508 + }, + { + "epoch": 0.6363903011341415, + "grad_norm": 16.820799804739174, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 631163868, + "step": 6509 + }, + { + "epoch": 0.6363903011341415, + "loss": 0.11415868252515793, + "loss_ce": 0.005516096018254757, + "loss_iou": 0.30078125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 631163868, + "step": 6509 + }, + { + "epoch": 0.6364880719593273, + "grad_norm": 10.965231708205724, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 631260432, + "step": 6510 + }, + { + "epoch": 0.6364880719593273, + "loss": 0.0660022422671318, + "loss_ce": 0.004021037835627794, + "loss_iou": 0.28515625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 631260432, + "step": 6510 + }, + { + "epoch": 0.6365858427845131, + "grad_norm": 4.196618863896971, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 631357680, + "step": 6511 + }, + { + "epoch": 0.6365858427845131, + "loss": 0.08258656412363052, + "loss_ce": 0.009016310796141624, + "loss_iou": 0.296875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 631357680, + "step": 6511 + }, + { + "epoch": 0.6366836136096988, + "grad_norm": 9.074248373999815, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 631453948, + "step": 6512 + }, + { + "epoch": 0.6366836136096988, + "loss": 0.10650216042995453, + "loss_ce": 0.003261192701756954, + "loss_iou": 0.29296875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 631453948, + "step": 6512 + }, + { + "epoch": 0.6367813844348846, + "grad_norm": 5.629042876995223, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 631551276, + "step": 6513 + }, + { + "epoch": 0.6367813844348846, + "loss": 0.0890393853187561, + "loss_ce": 0.004505693446844816, + "loss_iou": 0.38671875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 631551276, + "step": 6513 + }, + { + "epoch": 0.6368791552600704, + "grad_norm": 7.412306494115489, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 631649168, + "step": 6514 + }, + { + "epoch": 0.6368791552600704, + "loss": 0.1150108203291893, + "loss_ce": 0.004018386360257864, + "loss_iou": 0.365234375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 631649168, + "step": 6514 + }, + { + "epoch": 0.6369769260852561, + "grad_norm": 15.212599406990346, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 631746852, + "step": 6515 + }, + { + "epoch": 0.6369769260852561, + "loss": 0.0796031653881073, + "loss_ce": 0.005308124236762524, + "loss_iou": 0.33203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 631746852, + "step": 6515 + }, + { + "epoch": 0.6370746969104419, + "grad_norm": 12.015583211526451, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 631843252, + "step": 6516 + }, + { + "epoch": 0.6370746969104419, + "loss": 0.06489194184541702, + "loss_ce": 0.006313452031463385, + "loss_iou": 0.310546875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 631843252, + "step": 6516 + }, + { + "epoch": 0.6371724677356276, + "grad_norm": 5.5798617818626415, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 631939808, + "step": 6517 + }, + { + "epoch": 0.6371724677356276, + "loss": 0.06368228793144226, + "loss_ce": 0.005256380420178175, + "loss_iou": 0.224609375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 631939808, + "step": 6517 + }, + { + "epoch": 0.6372702385608134, + "grad_norm": 13.138191956932394, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 632037236, + "step": 6518 + }, + { + "epoch": 0.6372702385608134, + "loss": 0.07879240065813065, + "loss_ce": 0.007228681817650795, + "loss_iou": 0.275390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 632037236, + "step": 6518 + }, + { + "epoch": 0.6373680093859992, + "grad_norm": 4.737495137606068, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 632133412, + "step": 6519 + }, + { + "epoch": 0.6373680093859992, + "loss": 0.13771986961364746, + "loss_ce": 0.0063264318741858006, + "loss_iou": 0.228515625, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 632133412, + "step": 6519 + }, + { + "epoch": 0.6374657802111849, + "grad_norm": 4.8903698780108105, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 632229812, + "step": 6520 + }, + { + "epoch": 0.6374657802111849, + "loss": 0.10375230014324188, + "loss_ce": 0.006508040241897106, + "loss_iou": 0.24609375, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 632229812, + "step": 6520 + }, + { + "epoch": 0.6375635510363707, + "grad_norm": 22.66696949298709, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 632326784, + "step": 6521 + }, + { + "epoch": 0.6375635510363707, + "loss": 0.1255413293838501, + "loss_ce": 0.0034786479081958532, + "loss_iou": 0.3046875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 632326784, + "step": 6521 + }, + { + "epoch": 0.6376613218615566, + "grad_norm": 28.68255288001345, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 632422688, + "step": 6522 + }, + { + "epoch": 0.6376613218615566, + "loss": 0.043381430208683014, + "loss_ce": 0.0039679789915680885, + "loss_iou": 0.236328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 632422688, + "step": 6522 + }, + { + "epoch": 0.6377590926867422, + "grad_norm": 3.220254187719122, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 632518232, + "step": 6523 + }, + { + "epoch": 0.6377590926867422, + "loss": 0.032743558287620544, + "loss_ce": 0.0035000904463231564, + "loss_iou": 0.259765625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 632518232, + "step": 6523 + }, + { + "epoch": 0.637856863511928, + "grad_norm": 3.3227863560123647, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 632615216, + "step": 6524 + }, + { + "epoch": 0.637856863511928, + "loss": 0.0661308765411377, + "loss_ce": 0.007033584639430046, + "loss_iou": 0.259765625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 632615216, + "step": 6524 + }, + { + "epoch": 0.6379546343371139, + "grad_norm": 10.02238336413336, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 632711944, + "step": 6525 + }, + { + "epoch": 0.6379546343371139, + "loss": 0.10198608040809631, + "loss_ce": 0.009178310632705688, + "loss_iou": 0.2578125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 632711944, + "step": 6525 + }, + { + "epoch": 0.6380524051622996, + "grad_norm": 9.770060714035242, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 632808996, + "step": 6526 + }, + { + "epoch": 0.6380524051622996, + "loss": 0.06406454741954803, + "loss_ce": 0.005356354173272848, + "loss_iou": 0.302734375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 632808996, + "step": 6526 + }, + { + "epoch": 0.6381501759874854, + "grad_norm": 8.20486239799498, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 632906132, + "step": 6527 + }, + { + "epoch": 0.6381501759874854, + "loss": 0.03977210074663162, + "loss_ce": 0.003669807920232415, + "loss_iou": 0.32421875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 632906132, + "step": 6527 + }, + { + "epoch": 0.638247946812671, + "grad_norm": 45.058115264135346, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 633002948, + "step": 6528 + }, + { + "epoch": 0.638247946812671, + "loss": 0.07948718219995499, + "loss_ce": 0.00672564934939146, + "loss_iou": 0.326171875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 633002948, + "step": 6528 + }, + { + "epoch": 0.6383457176378569, + "grad_norm": 7.995599983292602, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 633100580, + "step": 6529 + }, + { + "epoch": 0.6383457176378569, + "loss": 0.11077114939689636, + "loss_ce": 0.01198575273156166, + "loss_iou": 0.30859375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 633100580, + "step": 6529 + }, + { + "epoch": 0.6384434884630427, + "grad_norm": 7.628778378718538, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 633197704, + "step": 6530 + }, + { + "epoch": 0.6384434884630427, + "loss": 0.08326761424541473, + "loss_ce": 0.006622721441090107, + "loss_iou": 0.275390625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 633197704, + "step": 6530 + }, + { + "epoch": 0.6385412592882284, + "grad_norm": 5.701396075388719, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 633293832, + "step": 6531 + }, + { + "epoch": 0.6385412592882284, + "loss": 0.07540004700422287, + "loss_ce": 0.00846736878156662, + "loss_iou": 0.173828125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 633293832, + "step": 6531 + }, + { + "epoch": 0.6386390301134142, + "grad_norm": 3.6346626900808676, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 633389900, + "step": 6532 + }, + { + "epoch": 0.6386390301134142, + "loss": 0.09707770496606827, + "loss_ce": 0.005988448392599821, + "loss_iou": 0.1162109375, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 633389900, + "step": 6532 + }, + { + "epoch": 0.6387368009386, + "grad_norm": 2.9161748208331697, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 633486780, + "step": 6533 + }, + { + "epoch": 0.6387368009386, + "loss": 0.052866701036691666, + "loss_ce": 0.0027110595256090164, + "loss_iou": 0.2578125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 633486780, + "step": 6533 + }, + { + "epoch": 0.6388345717637857, + "grad_norm": 6.783665349351995, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 633584248, + "step": 6534 + }, + { + "epoch": 0.6388345717637857, + "loss": 0.03577505052089691, + "loss_ce": 0.0019577587954699993, + "loss_iou": 0.2216796875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 633584248, + "step": 6534 + }, + { + "epoch": 0.6389323425889715, + "grad_norm": 3.0488189753069492, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 633681176, + "step": 6535 + }, + { + "epoch": 0.6389323425889715, + "loss": 0.0913034975528717, + "loss_ce": 0.007639553397893906, + "loss_iou": 0.255859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 633681176, + "step": 6535 + }, + { + "epoch": 0.6390301134141572, + "grad_norm": 6.3485778365117564, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 633778104, + "step": 6536 + }, + { + "epoch": 0.6390301134141572, + "loss": 0.02850150316953659, + "loss_ce": 0.004369728732854128, + "loss_iou": 0.2392578125, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 633778104, + "step": 6536 + }, + { + "epoch": 0.639127884239343, + "grad_norm": 8.187407178766655, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 633874388, + "step": 6537 + }, + { + "epoch": 0.639127884239343, + "loss": 0.06485282629728317, + "loss_ce": 0.00338279502466321, + "loss_iou": 0.267578125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 633874388, + "step": 6537 + }, + { + "epoch": 0.6392256550645288, + "grad_norm": 3.2694800981871697, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 633971280, + "step": 6538 + }, + { + "epoch": 0.6392256550645288, + "loss": 0.04590460658073425, + "loss_ce": 0.006506410427391529, + "loss_iou": 0.34765625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 633971280, + "step": 6538 + }, + { + "epoch": 0.6393234258897145, + "grad_norm": 2.8318945398126485, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 634067504, + "step": 6539 + }, + { + "epoch": 0.6393234258897145, + "loss": 0.07667022943496704, + "loss_ce": 0.005457456689327955, + "loss_iou": 0.208984375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 634067504, + "step": 6539 + }, + { + "epoch": 0.6394211967149003, + "grad_norm": 33.02765769155611, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 634164812, + "step": 6540 + }, + { + "epoch": 0.6394211967149003, + "loss": 0.08041907101869583, + "loss_ce": 0.007604127749800682, + "loss_iou": 0.26953125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 634164812, + "step": 6540 + }, + { + "epoch": 0.6395189675400861, + "grad_norm": 17.520497473617205, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 634262364, + "step": 6541 + }, + { + "epoch": 0.6395189675400861, + "loss": 0.10519151389598846, + "loss_ce": 0.006577779538929462, + "loss_iou": 0.2470703125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 634262364, + "step": 6541 + }, + { + "epoch": 0.6396167383652718, + "grad_norm": 4.764320149630838, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 634358456, + "step": 6542 + }, + { + "epoch": 0.6396167383652718, + "loss": 0.1374690681695938, + "loss_ce": 0.0033443067222833633, + "loss_iou": 0.318359375, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 634358456, + "step": 6542 + }, + { + "epoch": 0.6397145091904576, + "grad_norm": 7.036870251035404, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 634454924, + "step": 6543 + }, + { + "epoch": 0.6397145091904576, + "loss": 0.0478285551071167, + "loss_ce": 0.004684326238930225, + "loss_iou": 0.322265625, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 634454924, + "step": 6543 + }, + { + "epoch": 0.6398122800156434, + "grad_norm": 14.020477156771092, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 634551048, + "step": 6544 + }, + { + "epoch": 0.6398122800156434, + "loss": 0.07011700421571732, + "loss_ce": 0.0054502543061971664, + "loss_iou": 0.3046875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 634551048, + "step": 6544 + }, + { + "epoch": 0.6399100508408291, + "grad_norm": 16.66251437765191, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 634648320, + "step": 6545 + }, + { + "epoch": 0.6399100508408291, + "loss": 0.05295403674244881, + "loss_ce": 0.004812556318938732, + "loss_iou": 0.36328125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 634648320, + "step": 6545 + }, + { + "epoch": 0.6400078216660149, + "grad_norm": 12.928260110839933, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 634745308, + "step": 6546 + }, + { + "epoch": 0.6400078216660149, + "loss": 0.09504526108503342, + "loss_ce": 0.003111055586487055, + "loss_iou": 0.298828125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 634745308, + "step": 6546 + }, + { + "epoch": 0.6401055924912006, + "grad_norm": 19.333273122355543, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 634842564, + "step": 6547 + }, + { + "epoch": 0.6401055924912006, + "loss": 0.0681808665394783, + "loss_ce": 0.0028885100036859512, + "loss_iou": 0.4453125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 634842564, + "step": 6547 + }, + { + "epoch": 0.6402033633163864, + "grad_norm": 34.54121425186333, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 634938840, + "step": 6548 + }, + { + "epoch": 0.6402033633163864, + "loss": 0.06381645798683167, + "loss_ce": 0.0028118197806179523, + "loss_iou": 0.3046875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 634938840, + "step": 6548 + }, + { + "epoch": 0.6403011341415722, + "grad_norm": 7.293466961002095, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 635035892, + "step": 6549 + }, + { + "epoch": 0.6403011341415722, + "loss": 0.09001655131578445, + "loss_ce": 0.006581493653357029, + "loss_iou": 0.248046875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 635035892, + "step": 6549 + }, + { + "epoch": 0.6403989049667579, + "grad_norm": 23.923559759757513, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 635132620, + "step": 6550 + }, + { + "epoch": 0.6403989049667579, + "loss": 0.06974269449710846, + "loss_ce": 0.0010323660681024194, + "loss_iou": 0.255859375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 635132620, + "step": 6550 + }, + { + "epoch": 0.6404966757919437, + "grad_norm": 11.427107229563036, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 635230060, + "step": 6551 + }, + { + "epoch": 0.6404966757919437, + "loss": 0.07021574676036835, + "loss_ce": 0.004557172302156687, + "loss_iou": 0.318359375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 635230060, + "step": 6551 + }, + { + "epoch": 0.6405944466171295, + "grad_norm": 3.0610152352999473, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 635326880, + "step": 6552 + }, + { + "epoch": 0.6405944466171295, + "loss": 0.057374272495508194, + "loss_ce": 0.008744511753320694, + "loss_iou": 0.234375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 635326880, + "step": 6552 + }, + { + "epoch": 0.6406922174423152, + "grad_norm": 2.9582695774288257, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 635423984, + "step": 6553 + }, + { + "epoch": 0.6406922174423152, + "loss": 0.11508752405643463, + "loss_ce": 0.006048217415809631, + "loss_iou": 0.26953125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 635423984, + "step": 6553 + }, + { + "epoch": 0.640789988267501, + "grad_norm": 8.90767657297655, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 635520832, + "step": 6554 + }, + { + "epoch": 0.640789988267501, + "loss": 0.09242415428161621, + "loss_ce": 0.0035645952448248863, + "loss_iou": 0.27734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 635520832, + "step": 6554 + }, + { + "epoch": 0.6408877590926867, + "grad_norm": 15.295421540136225, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 635618444, + "step": 6555 + }, + { + "epoch": 0.6408877590926867, + "loss": 0.06728977710008621, + "loss_ce": 0.002775618340820074, + "loss_iou": 0.2734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 635618444, + "step": 6555 + }, + { + "epoch": 0.6409855299178725, + "grad_norm": 15.398006420151798, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 635714916, + "step": 6556 + }, + { + "epoch": 0.6409855299178725, + "loss": 0.08077948540449142, + "loss_ce": 0.004851755686104298, + "loss_iou": 0.310546875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 635714916, + "step": 6556 + }, + { + "epoch": 0.6410833007430583, + "grad_norm": 8.391267904174159, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 635812564, + "step": 6557 + }, + { + "epoch": 0.6410833007430583, + "loss": 0.061688363552093506, + "loss_ce": 0.005017221439629793, + "loss_iou": 0.28515625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 635812564, + "step": 6557 + }, + { + "epoch": 0.641181071568244, + "grad_norm": 11.135720879140731, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 635909124, + "step": 6558 + }, + { + "epoch": 0.641181071568244, + "loss": 0.09689095616340637, + "loss_ce": 0.008817225694656372, + "loss_iou": 0.26953125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 635909124, + "step": 6558 + }, + { + "epoch": 0.6412788423934298, + "grad_norm": 6.483076388935509, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 636005980, + "step": 6559 + }, + { + "epoch": 0.6412788423934298, + "loss": 0.06371697038412094, + "loss_ce": 0.006465993355959654, + "loss_iou": 0.330078125, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 636005980, + "step": 6559 + }, + { + "epoch": 0.6413766132186156, + "grad_norm": 6.729722944843895, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 636103364, + "step": 6560 + }, + { + "epoch": 0.6413766132186156, + "loss": 0.059491004794836044, + "loss_ce": 0.002392617054283619, + "loss_iou": 0.40625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 636103364, + "step": 6560 + }, + { + "epoch": 0.6414743840438013, + "grad_norm": 19.02992909657464, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 636199932, + "step": 6561 + }, + { + "epoch": 0.6414743840438013, + "loss": 0.08934730291366577, + "loss_ce": 0.004630506969988346, + "loss_iou": 0.25390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 636199932, + "step": 6561 + }, + { + "epoch": 0.6415721548689871, + "grad_norm": 10.39389131621616, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 636296832, + "step": 6562 + }, + { + "epoch": 0.6415721548689871, + "loss": 0.06984633207321167, + "loss_ce": 0.00936049036681652, + "loss_iou": 0.31640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 636296832, + "step": 6562 + }, + { + "epoch": 0.6416699256941728, + "grad_norm": 6.9483421398535254, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 636394024, + "step": 6563 + }, + { + "epoch": 0.6416699256941728, + "loss": 0.06363179534673691, + "loss_ce": 0.0045345015823841095, + "loss_iou": 0.240234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 636394024, + "step": 6563 + }, + { + "epoch": 0.6417676965193586, + "grad_norm": 14.072227593462395, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 636491028, + "step": 6564 + }, + { + "epoch": 0.6417676965193586, + "loss": 0.06626430153846741, + "loss_ce": 0.0013076290488243103, + "loss_iou": 0.296875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 636491028, + "step": 6564 + }, + { + "epoch": 0.6418654673445444, + "grad_norm": 12.775468294179873, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 636587652, + "step": 6565 + }, + { + "epoch": 0.6418654673445444, + "loss": 0.07081660628318787, + "loss_ce": 0.0013280762359499931, + "loss_iou": 0.30078125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 636587652, + "step": 6565 + }, + { + "epoch": 0.6419632381697301, + "grad_norm": 5.829416200770784, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 636684592, + "step": 6566 + }, + { + "epoch": 0.6419632381697301, + "loss": 0.057335954159498215, + "loss_ce": 0.007287125568836927, + "loss_iou": 0.228515625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 636684592, + "step": 6566 + }, + { + "epoch": 0.6420610089949159, + "grad_norm": 5.327425489311739, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 636782316, + "step": 6567 + }, + { + "epoch": 0.6420610089949159, + "loss": 0.09454979747533798, + "loss_ce": 0.005530024878680706, + "loss_iou": 0.291015625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 636782316, + "step": 6567 + }, + { + "epoch": 0.6421587798201017, + "grad_norm": 12.02904707320416, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 636879796, + "step": 6568 + }, + { + "epoch": 0.6421587798201017, + "loss": 0.059996411204338074, + "loss_ce": 0.004866407252848148, + "loss_iou": 0.328125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 636879796, + "step": 6568 + }, + { + "epoch": 0.6422565506452874, + "grad_norm": 15.547742007158348, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 636977696, + "step": 6569 + }, + { + "epoch": 0.6422565506452874, + "loss": 0.07494764029979706, + "loss_ce": 0.006130505818873644, + "loss_iou": 0.30859375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 636977696, + "step": 6569 + }, + { + "epoch": 0.6423543214704732, + "grad_norm": 14.227163802709425, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 637075568, + "step": 6570 + }, + { + "epoch": 0.6423543214704732, + "loss": 0.11437752842903137, + "loss_ce": 0.004972008988261223, + "loss_iou": 0.357421875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 637075568, + "step": 6570 + }, + { + "epoch": 0.642452092295659, + "grad_norm": 23.866645936118868, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 637172396, + "step": 6571 + }, + { + "epoch": 0.642452092295659, + "loss": 0.06404156237840652, + "loss_ce": 0.0036472780629992485, + "loss_iou": 0.37890625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 637172396, + "step": 6571 + }, + { + "epoch": 0.6425498631208447, + "grad_norm": 13.459831319481214, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 637269968, + "step": 6572 + }, + { + "epoch": 0.6425498631208447, + "loss": 0.08231314271688461, + "loss_ce": 0.007819737307727337, + "loss_iou": 0.345703125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 637269968, + "step": 6572 + }, + { + "epoch": 0.6426476339460305, + "grad_norm": 21.686778405685445, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 637365024, + "step": 6573 + }, + { + "epoch": 0.6426476339460305, + "loss": 0.0795116201043129, + "loss_ce": 0.004407859407365322, + "loss_iou": 0.34765625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 637365024, + "step": 6573 + }, + { + "epoch": 0.6427454047712162, + "grad_norm": 22.822388178594007, + "learning_rate": 5e-05, + "loss": 0.1164, + "num_input_tokens_seen": 637461456, + "step": 6574 + }, + { + "epoch": 0.6427454047712162, + "loss": 0.1204184740781784, + "loss_ce": 0.001674578059464693, + "loss_iou": 0.2421875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 637461456, + "step": 6574 + }, + { + "epoch": 0.642843175596402, + "grad_norm": 12.516412758069277, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 637558180, + "step": 6575 + }, + { + "epoch": 0.642843175596402, + "loss": 0.08239170163869858, + "loss_ce": 0.0036716079339385033, + "loss_iou": 0.318359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 637558180, + "step": 6575 + }, + { + "epoch": 0.6429409464215878, + "grad_norm": 2.509344553222815, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 637655572, + "step": 6576 + }, + { + "epoch": 0.6429409464215878, + "loss": 0.052827708423137665, + "loss_ce": 0.00420557614415884, + "loss_iou": 0.2333984375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 637655572, + "step": 6576 + }, + { + "epoch": 0.6430387172467735, + "grad_norm": 12.401695031966828, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 637751668, + "step": 6577 + }, + { + "epoch": 0.6430387172467735, + "loss": 0.0642232596874237, + "loss_ce": 0.012072527781128883, + "loss_iou": 0.1787109375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 637751668, + "step": 6577 + }, + { + "epoch": 0.6431364880719593, + "grad_norm": 16.447357967475515, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 637847612, + "step": 6578 + }, + { + "epoch": 0.6431364880719593, + "loss": 0.08167798817157745, + "loss_ce": 0.005841808393597603, + "loss_iou": 0.25, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 637847612, + "step": 6578 + }, + { + "epoch": 0.6432342588971451, + "grad_norm": 5.646036888073833, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 637943432, + "step": 6579 + }, + { + "epoch": 0.6432342588971451, + "loss": 0.05578599125146866, + "loss_ce": 0.0075377002358436584, + "loss_iou": 0.25390625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 637943432, + "step": 6579 + }, + { + "epoch": 0.6433320297223308, + "grad_norm": 12.059991546655205, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 638040676, + "step": 6580 + }, + { + "epoch": 0.6433320297223308, + "loss": 0.13633590936660767, + "loss_ce": 0.010328838601708412, + "loss_iou": 0.283203125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 638040676, + "step": 6580 + }, + { + "epoch": 0.6434298005475166, + "grad_norm": 26.914485233981708, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 638136448, + "step": 6581 + }, + { + "epoch": 0.6434298005475166, + "loss": 0.06167677789926529, + "loss_ce": 0.005692281760275364, + "loss_iou": 0.369140625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 638136448, + "step": 6581 + }, + { + "epoch": 0.6435275713727023, + "grad_norm": 4.76458719887757, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 638233052, + "step": 6582 + }, + { + "epoch": 0.6435275713727023, + "loss": 0.08938789367675781, + "loss_ce": 0.003725048154592514, + "loss_iou": 0.310546875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 638233052, + "step": 6582 + }, + { + "epoch": 0.6436253421978881, + "grad_norm": 24.895075723457865, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 638329528, + "step": 6583 + }, + { + "epoch": 0.6436253421978881, + "loss": 0.08483102172613144, + "loss_ce": 0.0074384394101798534, + "loss_iou": 0.306640625, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 638329528, + "step": 6583 + }, + { + "epoch": 0.643723113023074, + "grad_norm": 18.715012643528134, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 638427100, + "step": 6584 + }, + { + "epoch": 0.643723113023074, + "loss": 0.07373030483722687, + "loss_ce": 0.001891923020593822, + "loss_iou": 0.380859375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 638427100, + "step": 6584 + }, + { + "epoch": 0.6438208838482596, + "grad_norm": 4.158183400083145, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 638523804, + "step": 6585 + }, + { + "epoch": 0.6438208838482596, + "loss": 0.1036856546998024, + "loss_ce": 0.007356919348239899, + "loss_iou": 0.35546875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 638523804, + "step": 6585 + }, + { + "epoch": 0.6439186546734454, + "grad_norm": 8.991816819640702, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 638620212, + "step": 6586 + }, + { + "epoch": 0.6439186546734454, + "loss": 0.10523848980665207, + "loss_ce": 0.003340296447277069, + "loss_iou": 0.357421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 638620212, + "step": 6586 + }, + { + "epoch": 0.6440164254986313, + "grad_norm": 3.084077151040082, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 638717508, + "step": 6587 + }, + { + "epoch": 0.6440164254986313, + "loss": 0.06022322550415993, + "loss_ce": 0.007059697061777115, + "loss_iou": 0.28515625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 638717508, + "step": 6587 + }, + { + "epoch": 0.644114196323817, + "grad_norm": 3.6727850247215517, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 638813988, + "step": 6588 + }, + { + "epoch": 0.644114196323817, + "loss": 0.04909505695104599, + "loss_ce": 0.0054472871124744415, + "loss_iou": 0.2099609375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 638813988, + "step": 6588 + }, + { + "epoch": 0.6442119671490028, + "grad_norm": 6.366290176481517, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 638911328, + "step": 6589 + }, + { + "epoch": 0.6442119671490028, + "loss": 0.08429673314094543, + "loss_ce": 0.003211529925465584, + "loss_iou": 0.306640625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 638911328, + "step": 6589 + }, + { + "epoch": 0.6443097379741886, + "grad_norm": 6.383367994904453, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 639008444, + "step": 6590 + }, + { + "epoch": 0.6443097379741886, + "loss": 0.07502491027116776, + "loss_ce": 0.005261723417788744, + "loss_iou": 0.306640625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 639008444, + "step": 6590 + }, + { + "epoch": 0.6444075087993743, + "grad_norm": 10.110870512520831, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 639105576, + "step": 6591 + }, + { + "epoch": 0.6444075087993743, + "loss": 0.07451896369457245, + "loss_ce": 0.0022991145960986614, + "loss_iou": 0.2236328125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 639105576, + "step": 6591 + }, + { + "epoch": 0.6445052796245601, + "grad_norm": 9.90076429200192, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 639202704, + "step": 6592 + }, + { + "epoch": 0.6445052796245601, + "loss": 0.08724959194660187, + "loss_ce": 0.0061186086386442184, + "loss_iou": 0.2353515625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 639202704, + "step": 6592 + }, + { + "epoch": 0.6446030504497458, + "grad_norm": 3.8551673778703464, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 639299592, + "step": 6593 + }, + { + "epoch": 0.6446030504497458, + "loss": 0.08335206657648087, + "loss_ce": 0.0011987498728558421, + "loss_iou": 0.2177734375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 639299592, + "step": 6593 + }, + { + "epoch": 0.6447008212749316, + "grad_norm": 20.176989983949436, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 639396132, + "step": 6594 + }, + { + "epoch": 0.6447008212749316, + "loss": 0.06733432412147522, + "loss_ce": 0.005093718878924847, + "loss_iou": 0.330078125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 639396132, + "step": 6594 + }, + { + "epoch": 0.6447985921001174, + "grad_norm": 11.127821988756494, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 639492576, + "step": 6595 + }, + { + "epoch": 0.6447985921001174, + "loss": 0.0839109867811203, + "loss_ce": 0.010489506646990776, + "loss_iou": 0.1904296875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 639492576, + "step": 6595 + }, + { + "epoch": 0.6448963629253031, + "grad_norm": 2.9016550864923376, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 639589592, + "step": 6596 + }, + { + "epoch": 0.6448963629253031, + "loss": 0.05012369528412819, + "loss_ce": 0.003965856973081827, + "loss_iou": 0.2392578125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 639589592, + "step": 6596 + }, + { + "epoch": 0.6449941337504889, + "grad_norm": 8.938452525055082, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 639686552, + "step": 6597 + }, + { + "epoch": 0.6449941337504889, + "loss": 0.04011376202106476, + "loss_ce": 0.004751519300043583, + "loss_iou": 0.259765625, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 639686552, + "step": 6597 + }, + { + "epoch": 0.6450919045756747, + "grad_norm": 8.649400164819612, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 639783464, + "step": 6598 + }, + { + "epoch": 0.6450919045756747, + "loss": 0.11573748290538788, + "loss_ce": 0.01167253777384758, + "loss_iou": 0.2490234375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 639783464, + "step": 6598 + }, + { + "epoch": 0.6451896754008604, + "grad_norm": 11.443511730017814, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 639880028, + "step": 6599 + }, + { + "epoch": 0.6451896754008604, + "loss": 0.0665171816945076, + "loss_ce": 0.0130656398832798, + "loss_iou": 0.33203125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 639880028, + "step": 6599 + }, + { + "epoch": 0.6452874462260462, + "grad_norm": 4.181648620713907, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 639976656, + "step": 6600 + }, + { + "epoch": 0.6452874462260462, + "loss": 0.08225756883621216, + "loss_ce": 0.004376714117825031, + "loss_iou": 0.23828125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 639976656, + "step": 6600 + }, + { + "epoch": 0.6453852170512319, + "grad_norm": 22.634224642450484, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 640074628, + "step": 6601 + }, + { + "epoch": 0.6453852170512319, + "loss": 0.08587168902158737, + "loss_ce": 0.006785386707633734, + "loss_iou": 0.294921875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 640074628, + "step": 6601 + }, + { + "epoch": 0.6454829878764177, + "grad_norm": 10.957961891882182, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 640172004, + "step": 6602 + }, + { + "epoch": 0.6454829878764177, + "loss": 0.05516361445188522, + "loss_ce": 0.004962198436260223, + "loss_iou": 0.416015625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 640172004, + "step": 6602 + }, + { + "epoch": 0.6455807587016035, + "grad_norm": 4.222828365381044, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 640268468, + "step": 6603 + }, + { + "epoch": 0.6455807587016035, + "loss": 0.06989993155002594, + "loss_ce": 0.005553622730076313, + "loss_iou": 0.25, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 640268468, + "step": 6603 + }, + { + "epoch": 0.6456785295267892, + "grad_norm": 10.587281593417789, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 640365460, + "step": 6604 + }, + { + "epoch": 0.6456785295267892, + "loss": 0.08745171129703522, + "loss_ce": 0.006824266631156206, + "loss_iou": 0.37890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 640365460, + "step": 6604 + }, + { + "epoch": 0.645776300351975, + "grad_norm": 7.917343211094084, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 640463468, + "step": 6605 + }, + { + "epoch": 0.645776300351975, + "loss": 0.09162614494562149, + "loss_ce": 0.009907694533467293, + "loss_iou": 0.3125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 640463468, + "step": 6605 + }, + { + "epoch": 0.6458740711771608, + "grad_norm": 11.242186061815303, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 640560396, + "step": 6606 + }, + { + "epoch": 0.6458740711771608, + "loss": 0.0678071528673172, + "loss_ce": 0.004315329249948263, + "loss_iou": 0.244140625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 640560396, + "step": 6606 + }, + { + "epoch": 0.6459718420023465, + "grad_norm": 40.72215678909668, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 640657852, + "step": 6607 + }, + { + "epoch": 0.6459718420023465, + "loss": 0.05696394294500351, + "loss_ce": 0.00787641666829586, + "loss_iou": 0.32421875, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 640657852, + "step": 6607 + }, + { + "epoch": 0.6460696128275323, + "grad_norm": 28.245814431698708, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 640754432, + "step": 6608 + }, + { + "epoch": 0.6460696128275323, + "loss": 0.09579037129878998, + "loss_ce": 0.006076319143176079, + "loss_iou": 0.34765625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 640754432, + "step": 6608 + }, + { + "epoch": 0.646167383652718, + "grad_norm": 8.735072853030111, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 640851432, + "step": 6609 + }, + { + "epoch": 0.646167383652718, + "loss": 0.05902005732059479, + "loss_ce": 0.005354893859475851, + "loss_iou": 0.296875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 640851432, + "step": 6609 + }, + { + "epoch": 0.6462651544779038, + "grad_norm": 18.034681338371207, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 640948744, + "step": 6610 + }, + { + "epoch": 0.6462651544779038, + "loss": 0.08840535581111908, + "loss_ce": 0.005672207102179527, + "loss_iou": 0.388671875, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 640948744, + "step": 6610 + }, + { + "epoch": 0.6463629253030896, + "grad_norm": 27.873833314102043, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 641045336, + "step": 6611 + }, + { + "epoch": 0.6463629253030896, + "loss": 0.07654520124197006, + "loss_ce": 0.006568397395312786, + "loss_iou": 0.263671875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 641045336, + "step": 6611 + }, + { + "epoch": 0.6464606961282753, + "grad_norm": 7.398761111924255, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 641142112, + "step": 6612 + }, + { + "epoch": 0.6464606961282753, + "loss": 0.1062915027141571, + "loss_ce": 0.005964966025203466, + "loss_iou": 0.30078125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 641142112, + "step": 6612 + }, + { + "epoch": 0.6465584669534611, + "grad_norm": 4.7399134732279355, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 641238748, + "step": 6613 + }, + { + "epoch": 0.6465584669534611, + "loss": 0.05634249374270439, + "loss_ce": 0.002852806355804205, + "loss_iou": 0.2177734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 641238748, + "step": 6613 + }, + { + "epoch": 0.6466562377786469, + "grad_norm": 7.356145096167945, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 641335128, + "step": 6614 + }, + { + "epoch": 0.6466562377786469, + "loss": 0.05789108946919441, + "loss_ce": 0.0051185693591833115, + "loss_iou": 0.212890625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 641335128, + "step": 6614 + }, + { + "epoch": 0.6467540086038326, + "grad_norm": 4.005363245841481, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 641432148, + "step": 6615 + }, + { + "epoch": 0.6467540086038326, + "loss": 0.08049016445875168, + "loss_ce": 0.0030975867994129658, + "loss_iou": 0.2421875, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 641432148, + "step": 6615 + }, + { + "epoch": 0.6468517794290184, + "grad_norm": 2.9221838575453125, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 641527756, + "step": 6616 + }, + { + "epoch": 0.6468517794290184, + "loss": 0.05393531918525696, + "loss_ce": 0.004802017472684383, + "loss_iou": 0.19921875, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 641527756, + "step": 6616 + }, + { + "epoch": 0.6469495502542042, + "grad_norm": 8.461105100917727, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 641624992, + "step": 6617 + }, + { + "epoch": 0.6469495502542042, + "loss": 0.07144369184970856, + "loss_ce": 0.004137171432375908, + "loss_iou": 0.345703125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 641624992, + "step": 6617 + }, + { + "epoch": 0.6470473210793899, + "grad_norm": 12.13257996179465, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 641721380, + "step": 6618 + }, + { + "epoch": 0.6470473210793899, + "loss": 0.09833397716283798, + "loss_ce": 0.005453726276755333, + "loss_iou": 0.306640625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 641721380, + "step": 6618 + }, + { + "epoch": 0.6471450919045757, + "grad_norm": 15.265009983780413, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 641818392, + "step": 6619 + }, + { + "epoch": 0.6471450919045757, + "loss": 0.06401018798351288, + "loss_ce": 0.006164115853607655, + "loss_iou": 0.271484375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 641818392, + "step": 6619 + }, + { + "epoch": 0.6472428627297614, + "grad_norm": 8.795035873783696, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 641915008, + "step": 6620 + }, + { + "epoch": 0.6472428627297614, + "loss": 0.07465862482786179, + "loss_ce": 0.003979912027716637, + "loss_iou": 0.349609375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 641915008, + "step": 6620 + }, + { + "epoch": 0.6473406335549472, + "grad_norm": 3.874174549210776, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 642012072, + "step": 6621 + }, + { + "epoch": 0.6473406335549472, + "loss": 0.056736551225185394, + "loss_ce": 0.0011525998124852777, + "loss_iou": 0.28125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 642012072, + "step": 6621 + }, + { + "epoch": 0.647438404380133, + "grad_norm": 10.433163192255996, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 642109284, + "step": 6622 + }, + { + "epoch": 0.647438404380133, + "loss": 0.08526210486888885, + "loss_ce": 0.007579612545669079, + "loss_iou": 0.240234375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 642109284, + "step": 6622 + }, + { + "epoch": 0.6475361752053187, + "grad_norm": 10.604735101323326, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 642206328, + "step": 6623 + }, + { + "epoch": 0.6475361752053187, + "loss": 0.10381560772657394, + "loss_ce": 0.008127741515636444, + "loss_iou": 0.353515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 642206328, + "step": 6623 + }, + { + "epoch": 0.6476339460305045, + "grad_norm": 14.612287565927986, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 642303648, + "step": 6624 + }, + { + "epoch": 0.6476339460305045, + "loss": 0.12073734402656555, + "loss_ce": 0.009210856631398201, + "loss_iou": 0.34765625, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 642303648, + "step": 6624 + }, + { + "epoch": 0.6477317168556903, + "grad_norm": 30.484880157193484, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 642398760, + "step": 6625 + }, + { + "epoch": 0.6477317168556903, + "loss": 0.10519964247941971, + "loss_ce": 0.005010435823351145, + "loss_iou": 0.345703125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 642398760, + "step": 6625 + }, + { + "epoch": 0.647829487680876, + "grad_norm": 18.404603940844034, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 642495212, + "step": 6626 + }, + { + "epoch": 0.647829487680876, + "loss": 0.09650832414627075, + "loss_ce": 0.005436241626739502, + "loss_iou": 0.26171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 642495212, + "step": 6626 + }, + { + "epoch": 0.6479272585060618, + "grad_norm": 7.152980990312364, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 642591732, + "step": 6627 + }, + { + "epoch": 0.6479272585060618, + "loss": 0.042004942893981934, + "loss_ce": 0.0031408085487782955, + "loss_iou": 0.34765625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 642591732, + "step": 6627 + }, + { + "epoch": 0.6480250293312475, + "grad_norm": 8.029970154395079, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 642689300, + "step": 6628 + }, + { + "epoch": 0.6480250293312475, + "loss": 0.043355729430913925, + "loss_ce": 0.00246980506926775, + "loss_iou": 0.27734375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 642689300, + "step": 6628 + }, + { + "epoch": 0.6481228001564333, + "grad_norm": 11.66126860849068, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 642785624, + "step": 6629 + }, + { + "epoch": 0.6481228001564333, + "loss": 0.05806975066661835, + "loss_ce": 0.003267812542617321, + "loss_iou": 0.279296875, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 642785624, + "step": 6629 + }, + { + "epoch": 0.6482205709816191, + "grad_norm": 93.17080500708086, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 642882504, + "step": 6630 + }, + { + "epoch": 0.6482205709816191, + "loss": 0.09282272309064865, + "loss_ce": 0.00496261240914464, + "loss_iou": 0.431640625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 642882504, + "step": 6630 + }, + { + "epoch": 0.6483183418068048, + "grad_norm": 23.06122004569064, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 642979456, + "step": 6631 + }, + { + "epoch": 0.6483183418068048, + "loss": 0.0634034276008606, + "loss_ce": 0.0023987884633243084, + "loss_iou": 0.3125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 642979456, + "step": 6631 + }, + { + "epoch": 0.6484161126319906, + "grad_norm": 7.402315525812263, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 643076748, + "step": 6632 + }, + { + "epoch": 0.6484161126319906, + "loss": 0.11831048130989075, + "loss_ce": 0.008065732195973396, + "loss_iou": 0.208984375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 643076748, + "step": 6632 + }, + { + "epoch": 0.6485138834571764, + "grad_norm": 3.0737457916515036, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 643173364, + "step": 6633 + }, + { + "epoch": 0.6485138834571764, + "loss": 0.08189532160758972, + "loss_ce": 0.004853696562349796, + "loss_iou": 0.2275390625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 643173364, + "step": 6633 + }, + { + "epoch": 0.6486116542823621, + "grad_norm": 3.281180397109956, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 643270544, + "step": 6634 + }, + { + "epoch": 0.6486116542823621, + "loss": 0.06937664747238159, + "loss_ce": 0.0044810157269239426, + "loss_iou": 0.314453125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 643270544, + "step": 6634 + }, + { + "epoch": 0.6487094251075479, + "grad_norm": 2.334633034386511, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 643367504, + "step": 6635 + }, + { + "epoch": 0.6487094251075479, + "loss": 0.10561074316501617, + "loss_ce": 0.0035828580148518085, + "loss_iou": 0.2470703125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 643367504, + "step": 6635 + }, + { + "epoch": 0.6488071959327337, + "grad_norm": 3.577975847262744, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 643465716, + "step": 6636 + }, + { + "epoch": 0.6488071959327337, + "loss": 0.06035204604268074, + "loss_ce": 0.005252557806670666, + "loss_iou": 0.259765625, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 643465716, + "step": 6636 + }, + { + "epoch": 0.6489049667579194, + "grad_norm": 7.703963454084513, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 643563260, + "step": 6637 + }, + { + "epoch": 0.6489049667579194, + "loss": 0.11969847977161407, + "loss_ce": 0.002785635180771351, + "loss_iou": 0.359375, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 643563260, + "step": 6637 + }, + { + "epoch": 0.6490027375831052, + "grad_norm": 7.019738146705351, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 643659984, + "step": 6638 + }, + { + "epoch": 0.6490027375831052, + "loss": 0.0948939099907875, + "loss_ce": 0.0015024931635707617, + "loss_iou": 0.1904296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 643659984, + "step": 6638 + }, + { + "epoch": 0.6491005084082909, + "grad_norm": 12.74623493054709, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 643757640, + "step": 6639 + }, + { + "epoch": 0.6491005084082909, + "loss": 0.062010154128074646, + "loss_ce": 0.007177694700658321, + "loss_iou": 0.251953125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 643757640, + "step": 6639 + }, + { + "epoch": 0.6491982792334767, + "grad_norm": 6.94367758771205, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 643855728, + "step": 6640 + }, + { + "epoch": 0.6491982792334767, + "loss": 0.08230899274349213, + "loss_ce": 0.0007736507104709744, + "loss_iou": 0.388671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 643855728, + "step": 6640 + }, + { + "epoch": 0.6492960500586625, + "grad_norm": 14.460335269684945, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 643952936, + "step": 6641 + }, + { + "epoch": 0.6492960500586625, + "loss": 0.07426699995994568, + "loss_ce": 0.00859316810965538, + "loss_iou": 0.310546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 643952936, + "step": 6641 + }, + { + "epoch": 0.6493938208838482, + "grad_norm": 9.388782003031693, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 644049624, + "step": 6642 + }, + { + "epoch": 0.6493938208838482, + "loss": 0.055615074932575226, + "loss_ce": 0.0037809712812304497, + "loss_iou": 0.212890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 644049624, + "step": 6642 + }, + { + "epoch": 0.649491591709034, + "grad_norm": 7.611729084147663, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 644146604, + "step": 6643 + }, + { + "epoch": 0.649491591709034, + "loss": 0.041487179696559906, + "loss_ce": 0.003162823151797056, + "loss_iou": 0.275390625, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 644146604, + "step": 6643 + }, + { + "epoch": 0.6495893625342198, + "grad_norm": 3.799851252890533, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 644244024, + "step": 6644 + }, + { + "epoch": 0.6495893625342198, + "loss": 0.05748096853494644, + "loss_ce": 0.0015117295552045107, + "loss_iou": 0.390625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 644244024, + "step": 6644 + }, + { + "epoch": 0.6496871333594055, + "grad_norm": 6.987702535853943, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 644341020, + "step": 6645 + }, + { + "epoch": 0.6496871333594055, + "loss": 0.12297365069389343, + "loss_ce": 0.006320207845419645, + "loss_iou": 0.3046875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 644341020, + "step": 6645 + }, + { + "epoch": 0.6497849041845913, + "grad_norm": 4.389862501220783, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 644437816, + "step": 6646 + }, + { + "epoch": 0.6497849041845913, + "loss": 0.04495231434702873, + "loss_ce": 0.007339398376643658, + "loss_iou": 0.25, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 644437816, + "step": 6646 + }, + { + "epoch": 0.649882675009777, + "grad_norm": 5.554292373894543, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 644533752, + "step": 6647 + }, + { + "epoch": 0.649882675009777, + "loss": 0.08267161250114441, + "loss_ce": 0.0042261709459125996, + "loss_iou": 0.259765625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 644533752, + "step": 6647 + }, + { + "epoch": 0.6499804458349628, + "grad_norm": 5.602841150320623, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 644631152, + "step": 6648 + }, + { + "epoch": 0.6499804458349628, + "loss": 0.042762137949466705, + "loss_ce": 0.00859771203249693, + "loss_iou": 0.3828125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 644631152, + "step": 6648 + }, + { + "epoch": 0.6500782166601486, + "grad_norm": 5.423113305647475, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 644727592, + "step": 6649 + }, + { + "epoch": 0.6500782166601486, + "loss": 0.055024560540914536, + "loss_ce": 0.0051435804925858974, + "loss_iou": 0.33203125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 644727592, + "step": 6649 + }, + { + "epoch": 0.6501759874853343, + "grad_norm": 3.0159427516065644, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 644824432, + "step": 6650 + }, + { + "epoch": 0.6501759874853343, + "loss": 0.09107668697834015, + "loss_ce": 0.006787132006138563, + "loss_iou": 0.314453125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 644824432, + "step": 6650 + }, + { + "epoch": 0.6502737583105201, + "grad_norm": 4.615283300153644, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 644921676, + "step": 6651 + }, + { + "epoch": 0.6502737583105201, + "loss": 0.10076836496591568, + "loss_ce": 0.003154079895466566, + "loss_iou": 0.279296875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 644921676, + "step": 6651 + }, + { + "epoch": 0.650371529135706, + "grad_norm": 14.878643321741876, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 645018700, + "step": 6652 + }, + { + "epoch": 0.650371529135706, + "loss": 0.12900666892528534, + "loss_ce": 0.006161983590573072, + "loss_iou": 0.232421875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 645018700, + "step": 6652 + }, + { + "epoch": 0.6504692999608916, + "grad_norm": 23.335574930883688, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 645114692, + "step": 6653 + }, + { + "epoch": 0.6504692999608916, + "loss": 0.12368615716695786, + "loss_ce": 0.00601037684828043, + "loss_iou": 0.216796875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 645114692, + "step": 6653 + }, + { + "epoch": 0.6505670707860775, + "grad_norm": 8.29531435074572, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 645211472, + "step": 6654 + }, + { + "epoch": 0.6505670707860775, + "loss": 0.10300901532173157, + "loss_ce": 0.009671002626419067, + "loss_iou": 0.375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 645211472, + "step": 6654 + }, + { + "epoch": 0.6506648416112631, + "grad_norm": 13.516249550436461, + "learning_rate": 5e-05, + "loss": 0.0515, + "num_input_tokens_seen": 645307988, + "step": 6655 + }, + { + "epoch": 0.6506648416112631, + "loss": 0.05769806727766991, + "loss_ce": 0.004017645493149757, + "loss_iou": 0.2431640625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 645307988, + "step": 6655 + }, + { + "epoch": 0.650762612436449, + "grad_norm": 18.386359755561017, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 645404412, + "step": 6656 + }, + { + "epoch": 0.650762612436449, + "loss": 0.04687312990427017, + "loss_ce": 0.004255335777997971, + "loss_iou": 0.25390625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 645404412, + "step": 6656 + }, + { + "epoch": 0.6508603832616348, + "grad_norm": 28.568747462073095, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 645501316, + "step": 6657 + }, + { + "epoch": 0.6508603832616348, + "loss": 0.061912115663290024, + "loss_ce": 0.0034709544852375984, + "loss_iou": 0.287109375, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 645501316, + "step": 6657 + }, + { + "epoch": 0.6509581540868205, + "grad_norm": 16.768331010886076, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 645598268, + "step": 6658 + }, + { + "epoch": 0.6509581540868205, + "loss": 0.0911099910736084, + "loss_ce": 0.007461302913725376, + "loss_iou": 0.33984375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 645598268, + "step": 6658 + }, + { + "epoch": 0.6510559249120063, + "grad_norm": 28.051432423900657, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 645694988, + "step": 6659 + }, + { + "epoch": 0.6510559249120063, + "loss": 0.1044699102640152, + "loss_ce": 0.0058371033519506454, + "loss_iou": 0.314453125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 645694988, + "step": 6659 + }, + { + "epoch": 0.6511536957371921, + "grad_norm": 6.161921810521571, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 645792108, + "step": 6660 + }, + { + "epoch": 0.6511536957371921, + "loss": 0.0659564882516861, + "loss_ce": 0.005318065173923969, + "loss_iou": 0.333984375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 645792108, + "step": 6660 + }, + { + "epoch": 0.6512514665623778, + "grad_norm": 17.58172160811645, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 645889052, + "step": 6661 + }, + { + "epoch": 0.6512514665623778, + "loss": 0.1363992840051651, + "loss_ce": 0.005347264930605888, + "loss_iou": 0.294921875, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 645889052, + "step": 6661 + }, + { + "epoch": 0.6513492373875636, + "grad_norm": 21.503026853353933, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 645986192, + "step": 6662 + }, + { + "epoch": 0.6513492373875636, + "loss": 0.07906907796859741, + "loss_ce": 0.005765859968960285, + "loss_iou": 0.3984375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 645986192, + "step": 6662 + }, + { + "epoch": 0.6514470082127494, + "grad_norm": 12.261241899654598, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 646083060, + "step": 6663 + }, + { + "epoch": 0.6514470082127494, + "loss": 0.1128549873828888, + "loss_ce": 0.007782968692481518, + "loss_iou": 0.412109375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 646083060, + "step": 6663 + }, + { + "epoch": 0.6515447790379351, + "grad_norm": 3.74421406028101, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 646179636, + "step": 6664 + }, + { + "epoch": 0.6515447790379351, + "loss": 0.06682048738002777, + "loss_ce": 0.0049308426678180695, + "loss_iou": 0.236328125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 646179636, + "step": 6664 + }, + { + "epoch": 0.6516425498631209, + "grad_norm": 3.4140701316852273, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 646276780, + "step": 6665 + }, + { + "epoch": 0.6516425498631209, + "loss": 0.061970096081495285, + "loss_ce": 0.0027583688497543335, + "loss_iou": 0.353515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 646276780, + "step": 6665 + }, + { + "epoch": 0.6517403206883066, + "grad_norm": 4.213181657774652, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 646374048, + "step": 6666 + }, + { + "epoch": 0.6517403206883066, + "loss": 0.07459275424480438, + "loss_ce": 0.00334946415387094, + "loss_iou": 0.328125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 646374048, + "step": 6666 + }, + { + "epoch": 0.6518380915134924, + "grad_norm": 16.182185249414086, + "learning_rate": 5e-05, + "loss": 0.1424, + "num_input_tokens_seen": 646471048, + "step": 6667 + }, + { + "epoch": 0.6518380915134924, + "loss": 0.19955278933048248, + "loss_ce": 0.00712421303614974, + "loss_iou": 0.3203125, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 646471048, + "step": 6667 + }, + { + "epoch": 0.6519358623386782, + "grad_norm": 3.5542065985389297, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 646567444, + "step": 6668 + }, + { + "epoch": 0.6519358623386782, + "loss": 0.08943088352680206, + "loss_ce": 0.0063849245198071, + "loss_iou": 0.2421875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 646567444, + "step": 6668 + }, + { + "epoch": 0.6520336331638639, + "grad_norm": 7.2729527447697295, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 646664452, + "step": 6669 + }, + { + "epoch": 0.6520336331638639, + "loss": 0.07606082409620285, + "loss_ce": 0.0026813072618097067, + "loss_iou": 0.23046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 646664452, + "step": 6669 + }, + { + "epoch": 0.6521314039890497, + "grad_norm": 10.788609287724857, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 646759852, + "step": 6670 + }, + { + "epoch": 0.6521314039890497, + "loss": 0.05453500896692276, + "loss_ce": 0.006401156075298786, + "loss_iou": 0.2119140625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 646759852, + "step": 6670 + }, + { + "epoch": 0.6522291748142355, + "grad_norm": 11.71990237056299, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 646856808, + "step": 6671 + }, + { + "epoch": 0.6522291748142355, + "loss": 0.1050589308142662, + "loss_ce": 0.001673007383942604, + "loss_iou": 0.259765625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 646856808, + "step": 6671 + }, + { + "epoch": 0.6523269456394212, + "grad_norm": 7.668591894889193, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 646953700, + "step": 6672 + }, + { + "epoch": 0.6523269456394212, + "loss": 0.08097680658102036, + "loss_ce": 0.0034468977246433496, + "loss_iou": 0.34765625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 646953700, + "step": 6672 + }, + { + "epoch": 0.652424716464607, + "grad_norm": 5.0771422139443745, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 647051736, + "step": 6673 + }, + { + "epoch": 0.652424716464607, + "loss": 0.09573596715927124, + "loss_ce": 0.006487308070063591, + "loss_iou": 0.298828125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 647051736, + "step": 6673 + }, + { + "epoch": 0.6525224872897927, + "grad_norm": 12.776526464293015, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 647148332, + "step": 6674 + }, + { + "epoch": 0.6525224872897927, + "loss": 0.07752884924411774, + "loss_ce": 0.005629436578601599, + "loss_iou": 0.34375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 647148332, + "step": 6674 + }, + { + "epoch": 0.6526202581149785, + "grad_norm": 6.321812961396829, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 647245024, + "step": 6675 + }, + { + "epoch": 0.6526202581149785, + "loss": 0.07193288952112198, + "loss_ce": 0.00331411836668849, + "loss_iou": 0.353515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 647245024, + "step": 6675 + }, + { + "epoch": 0.6527180289401643, + "grad_norm": 7.137405700749762, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 647341452, + "step": 6676 + }, + { + "epoch": 0.6527180289401643, + "loss": 0.07842417061328888, + "loss_ce": 0.0061738016083836555, + "loss_iou": 0.2431640625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 647341452, + "step": 6676 + }, + { + "epoch": 0.65281579976535, + "grad_norm": 13.74785262909925, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 647439040, + "step": 6677 + }, + { + "epoch": 0.65281579976535, + "loss": 0.08880075812339783, + "loss_ce": 0.010126447305083275, + "loss_iou": 0.365234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 647439040, + "step": 6677 + }, + { + "epoch": 0.6529135705905358, + "grad_norm": 3.8243382403585513, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 647535652, + "step": 6678 + }, + { + "epoch": 0.6529135705905358, + "loss": 0.06333497166633606, + "loss_ce": 0.0039172424003481865, + "loss_iou": 0.310546875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 647535652, + "step": 6678 + }, + { + "epoch": 0.6530113414157216, + "grad_norm": 5.189117591407626, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 647632808, + "step": 6679 + }, + { + "epoch": 0.6530113414157216, + "loss": 0.06192243844270706, + "loss_ce": 0.008119948208332062, + "loss_iou": 0.255859375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 647632808, + "step": 6679 + }, + { + "epoch": 0.6531091122409073, + "grad_norm": 2.925578665660134, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 647729364, + "step": 6680 + }, + { + "epoch": 0.6531091122409073, + "loss": 0.06519626080989838, + "loss_ce": 0.008044462651014328, + "loss_iou": 0.310546875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 647729364, + "step": 6680 + }, + { + "epoch": 0.6532068830660931, + "grad_norm": 16.509642905669253, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 647827096, + "step": 6681 + }, + { + "epoch": 0.6532068830660931, + "loss": 0.11256164312362671, + "loss_ce": 0.014417117461562157, + "loss_iou": 0.291015625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 647827096, + "step": 6681 + }, + { + "epoch": 0.6533046538912789, + "grad_norm": 6.234123274507821, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 647922876, + "step": 6682 + }, + { + "epoch": 0.6533046538912789, + "loss": 0.08818015456199646, + "loss_ce": 0.006515120156109333, + "loss_iou": 0.244140625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 647922876, + "step": 6682 + }, + { + "epoch": 0.6534024247164646, + "grad_norm": 5.091365300458331, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 648019460, + "step": 6683 + }, + { + "epoch": 0.6534024247164646, + "loss": 0.06352510303258896, + "loss_ce": 0.005343334749341011, + "loss_iou": 0.212890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 648019460, + "step": 6683 + }, + { + "epoch": 0.6535001955416504, + "grad_norm": 2.813206560739552, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 648116440, + "step": 6684 + }, + { + "epoch": 0.6535001955416504, + "loss": 0.07441214472055435, + "loss_ce": 0.006953035946935415, + "loss_iou": 0.263671875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 648116440, + "step": 6684 + }, + { + "epoch": 0.6535979663668361, + "grad_norm": 3.2901046554096776, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 648213796, + "step": 6685 + }, + { + "epoch": 0.6535979663668361, + "loss": 0.11848006397485733, + "loss_ce": 0.00498519092798233, + "loss_iou": 0.2578125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 648213796, + "step": 6685 + }, + { + "epoch": 0.6536957371920219, + "grad_norm": 6.957084912296016, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 648311832, + "step": 6686 + }, + { + "epoch": 0.6536957371920219, + "loss": 0.059755176305770874, + "loss_ce": 0.0032671410590410233, + "loss_iou": 0.400390625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 648311832, + "step": 6686 + }, + { + "epoch": 0.6537935080172077, + "grad_norm": 5.27410760646591, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 648408916, + "step": 6687 + }, + { + "epoch": 0.6537935080172077, + "loss": 0.07685472071170807, + "loss_ce": 0.0027427878230810165, + "loss_iou": 0.25, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 648408916, + "step": 6687 + }, + { + "epoch": 0.6538912788423934, + "grad_norm": 10.69231547253313, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 648506476, + "step": 6688 + }, + { + "epoch": 0.6538912788423934, + "loss": 0.056074418127536774, + "loss_ce": 0.003225601278245449, + "loss_iou": 0.3203125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 648506476, + "step": 6688 + }, + { + "epoch": 0.6539890496675792, + "grad_norm": 13.695761344324861, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 648604128, + "step": 6689 + }, + { + "epoch": 0.6539890496675792, + "loss": 0.08220923691987991, + "loss_ce": 0.0024973207619041204, + "loss_iou": 0.349609375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 648604128, + "step": 6689 + }, + { + "epoch": 0.654086820492765, + "grad_norm": 6.177340649862507, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 648700872, + "step": 6690 + }, + { + "epoch": 0.654086820492765, + "loss": 0.051828496158123016, + "loss_ce": 0.00459491740912199, + "loss_iou": 0.33203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 648700872, + "step": 6690 + }, + { + "epoch": 0.6541845913179507, + "grad_norm": 6.018252659518269, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 648798588, + "step": 6691 + }, + { + "epoch": 0.6541845913179507, + "loss": 0.09334137290716171, + "loss_ce": 0.004100348800420761, + "loss_iou": 0.349609375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 648798588, + "step": 6691 + }, + { + "epoch": 0.6542823621431365, + "grad_norm": 5.695692881420405, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 648895760, + "step": 6692 + }, + { + "epoch": 0.6542823621431365, + "loss": 0.07629373669624329, + "loss_ce": 0.0055616190657019615, + "loss_iou": 0.34375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 648895760, + "step": 6692 + }, + { + "epoch": 0.6543801329683222, + "grad_norm": 6.37433549650106, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 648992780, + "step": 6693 + }, + { + "epoch": 0.6543801329683222, + "loss": 0.09542396664619446, + "loss_ce": 0.003932265564799309, + "loss_iou": 0.3046875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 648992780, + "step": 6693 + }, + { + "epoch": 0.654477903793508, + "grad_norm": 26.82932398364479, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 649089832, + "step": 6694 + }, + { + "epoch": 0.654477903793508, + "loss": 0.053658612072467804, + "loss_ce": 0.003396160202100873, + "loss_iou": 0.251953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 649089832, + "step": 6694 + }, + { + "epoch": 0.6545756746186938, + "grad_norm": 8.870371342317615, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 649186824, + "step": 6695 + }, + { + "epoch": 0.6545756746186938, + "loss": 0.09188380837440491, + "loss_ce": 0.007258562371134758, + "loss_iou": 0.279296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 649186824, + "step": 6695 + }, + { + "epoch": 0.6546734454438795, + "grad_norm": 7.021327013312695, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 649284064, + "step": 6696 + }, + { + "epoch": 0.6546734454438795, + "loss": 0.052732329815626144, + "loss_ce": 0.00201974343508482, + "loss_iou": 0.2412109375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 649284064, + "step": 6696 + }, + { + "epoch": 0.6547712162690653, + "grad_norm": 2.9989931483873393, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 649380860, + "step": 6697 + }, + { + "epoch": 0.6547712162690653, + "loss": 0.08330881595611572, + "loss_ce": 0.005962010473012924, + "loss_iou": 0.2119140625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 649380860, + "step": 6697 + }, + { + "epoch": 0.6548689870942511, + "grad_norm": 4.5463967750009235, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 649476980, + "step": 6698 + }, + { + "epoch": 0.6548689870942511, + "loss": 0.06575409322977066, + "loss_ce": 0.005222478415817022, + "loss_iou": 0.29296875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 649476980, + "step": 6698 + }, + { + "epoch": 0.6549667579194368, + "grad_norm": 10.009573315786568, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 649573696, + "step": 6699 + }, + { + "epoch": 0.6549667579194368, + "loss": 0.07687844336032867, + "loss_ce": 0.008824242278933525, + "loss_iou": 0.2431640625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 649573696, + "step": 6699 + }, + { + "epoch": 0.6550645287446226, + "grad_norm": 14.726608015125432, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 649669868, + "step": 6700 + }, + { + "epoch": 0.6550645287446226, + "loss": 0.09841649979352951, + "loss_ce": 0.00837438739836216, + "loss_iou": 0.26171875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 649669868, + "step": 6700 + }, + { + "epoch": 0.6551622995698083, + "grad_norm": 9.359054319520313, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 649768148, + "step": 6701 + }, + { + "epoch": 0.6551622995698083, + "loss": 0.07153136283159256, + "loss_ce": 0.0027752600144594908, + "loss_iou": 0.451171875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 649768148, + "step": 6701 + }, + { + "epoch": 0.6552600703949941, + "grad_norm": 13.821163016394578, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 649865000, + "step": 6702 + }, + { + "epoch": 0.6552600703949941, + "loss": 0.0694495439529419, + "loss_ce": 0.0059424638748168945, + "loss_iou": 0.291015625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 649865000, + "step": 6702 + }, + { + "epoch": 0.6553578412201799, + "grad_norm": 16.981693385526565, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 649961736, + "step": 6703 + }, + { + "epoch": 0.6553578412201799, + "loss": 0.06210889667272568, + "loss_ce": 0.004445934668183327, + "loss_iou": 0.2275390625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 649961736, + "step": 6703 + }, + { + "epoch": 0.6554556120453656, + "grad_norm": 36.6503250251311, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 650058492, + "step": 6704 + }, + { + "epoch": 0.6554556120453656, + "loss": 0.05232216790318489, + "loss_ce": 0.001495140721090138, + "loss_iou": 0.34765625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 650058492, + "step": 6704 + }, + { + "epoch": 0.6555533828705514, + "grad_norm": 32.413870828415, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 650154904, + "step": 6705 + }, + { + "epoch": 0.6555533828705514, + "loss": 0.0821816623210907, + "loss_ce": 0.003804896492511034, + "loss_iou": 0.265625, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 650154904, + "step": 6705 + }, + { + "epoch": 0.6556511536957372, + "grad_norm": 7.97351551693616, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 650252028, + "step": 6706 + }, + { + "epoch": 0.6556511536957372, + "loss": 0.07881806790828705, + "loss_ce": 0.005438548978418112, + "loss_iou": 0.3046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 650252028, + "step": 6706 + }, + { + "epoch": 0.6557489245209229, + "grad_norm": 3.030535645272919, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 650349576, + "step": 6707 + }, + { + "epoch": 0.6557489245209229, + "loss": 0.048746298998594284, + "loss_ce": 0.0033895475789904594, + "loss_iou": 0.34375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 650349576, + "step": 6707 + }, + { + "epoch": 0.6558466953461087, + "grad_norm": 10.536082724549116, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 650447656, + "step": 6708 + }, + { + "epoch": 0.6558466953461087, + "loss": 0.06258261948823929, + "loss_ce": 0.0011965136509388685, + "loss_iou": 0.32421875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 650447656, + "step": 6708 + }, + { + "epoch": 0.6559444661712945, + "grad_norm": 15.627337201916115, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 650544764, + "step": 6709 + }, + { + "epoch": 0.6559444661712945, + "loss": 0.071535125374794, + "loss_ce": 0.0032978146336972713, + "loss_iou": 0.28125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 650544764, + "step": 6709 + }, + { + "epoch": 0.6560422369964802, + "grad_norm": 9.012790346112093, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 650641716, + "step": 6710 + }, + { + "epoch": 0.6560422369964802, + "loss": 0.06871437281370163, + "loss_ce": 0.005520098377019167, + "loss_iou": 0.2109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 650641716, + "step": 6710 + }, + { + "epoch": 0.656140007821666, + "grad_norm": 5.217943373014443, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 650739496, + "step": 6711 + }, + { + "epoch": 0.656140007821666, + "loss": 0.05409502238035202, + "loss_ce": 0.0017878960352391005, + "loss_iou": 0.27734375, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 650739496, + "step": 6711 + }, + { + "epoch": 0.6562377786468517, + "grad_norm": 15.950797234338074, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 650835984, + "step": 6712 + }, + { + "epoch": 0.6562377786468517, + "loss": 0.05303536355495453, + "loss_ce": 0.0029255026020109653, + "loss_iou": 0.26953125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 650835984, + "step": 6712 + }, + { + "epoch": 0.6563355494720375, + "grad_norm": 18.463222610002564, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 650933148, + "step": 6713 + }, + { + "epoch": 0.6563355494720375, + "loss": 0.0471968874335289, + "loss_ce": 0.0029235114343464375, + "loss_iou": 0.25390625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 650933148, + "step": 6713 + }, + { + "epoch": 0.6564333202972233, + "grad_norm": 9.318844885171636, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 651030300, + "step": 6714 + }, + { + "epoch": 0.6564333202972233, + "loss": 0.06159937381744385, + "loss_ce": 0.004226325079798698, + "loss_iou": 0.326171875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 651030300, + "step": 6714 + }, + { + "epoch": 0.656531091122409, + "grad_norm": 4.1123804928471195, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 651127456, + "step": 6715 + }, + { + "epoch": 0.656531091122409, + "loss": 0.04621999338269234, + "loss_ce": 0.0028392556123435497, + "loss_iou": 0.37109375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 651127456, + "step": 6715 + }, + { + "epoch": 0.6566288619475948, + "grad_norm": 6.171402032845546, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 651224264, + "step": 6716 + }, + { + "epoch": 0.6566288619475948, + "loss": 0.11780419945716858, + "loss_ce": 0.0034548325929790735, + "loss_iou": 0.283203125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 651224264, + "step": 6716 + }, + { + "epoch": 0.6567266327727806, + "grad_norm": 3.5683467941212044, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 651320720, + "step": 6717 + }, + { + "epoch": 0.6567266327727806, + "loss": 0.05664908140897751, + "loss_ce": 0.004250399302691221, + "loss_iou": 0.330078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 651320720, + "step": 6717 + }, + { + "epoch": 0.6568244035979663, + "grad_norm": 9.987476920234707, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 651417844, + "step": 6718 + }, + { + "epoch": 0.6568244035979663, + "loss": 0.10614380985498428, + "loss_ce": 0.0036543388850986958, + "loss_iou": 0.294921875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 651417844, + "step": 6718 + }, + { + "epoch": 0.6569221744231521, + "grad_norm": 100.3360951461558, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 651514688, + "step": 6719 + }, + { + "epoch": 0.6569221744231521, + "loss": 0.06409519165754318, + "loss_ce": 0.0029990007169544697, + "loss_iou": 0.33984375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 651514688, + "step": 6719 + }, + { + "epoch": 0.6570199452483378, + "grad_norm": 9.969395151742868, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 651612016, + "step": 6720 + }, + { + "epoch": 0.6570199452483378, + "loss": 0.06956758350133896, + "loss_ce": 0.0060376133769750595, + "loss_iou": 0.1904296875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 651612016, + "step": 6720 + }, + { + "epoch": 0.6571177160735236, + "grad_norm": 8.243030686906387, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 651708252, + "step": 6721 + }, + { + "epoch": 0.6571177160735236, + "loss": 0.08693892508745193, + "loss_ce": 0.007715298794209957, + "loss_iou": 0.359375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 651708252, + "step": 6721 + }, + { + "epoch": 0.6572154868987095, + "grad_norm": 4.251359850036002, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 651804664, + "step": 6722 + }, + { + "epoch": 0.6572154868987095, + "loss": 0.058031924068927765, + "loss_ce": 0.006289368495345116, + "loss_iou": 0.2451171875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 651804664, + "step": 6722 + }, + { + "epoch": 0.6573132577238952, + "grad_norm": 9.937234347023422, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 651903060, + "step": 6723 + }, + { + "epoch": 0.6573132577238952, + "loss": 0.07652512937784195, + "loss_ce": 0.0185417290776968, + "loss_iou": 0.33203125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 651903060, + "step": 6723 + }, + { + "epoch": 0.657411028549081, + "grad_norm": 3.828202052043105, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 652000196, + "step": 6724 + }, + { + "epoch": 0.657411028549081, + "loss": 0.09892325103282928, + "loss_ce": 0.005783607251942158, + "loss_iou": 0.302734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 652000196, + "step": 6724 + }, + { + "epoch": 0.6575087993742668, + "grad_norm": 11.784438634016418, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 652096796, + "step": 6725 + }, + { + "epoch": 0.6575087993742668, + "loss": 0.08120661228895187, + "loss_ce": 0.005172068253159523, + "loss_iou": 0.30859375, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 652096796, + "step": 6725 + }, + { + "epoch": 0.6576065701994525, + "grad_norm": 4.113546530086171, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 652193744, + "step": 6726 + }, + { + "epoch": 0.6576065701994525, + "loss": 0.0735650286078453, + "loss_ce": 0.004839442670345306, + "loss_iou": 0.380859375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 652193744, + "step": 6726 + }, + { + "epoch": 0.6577043410246383, + "grad_norm": 8.747771573914942, + "learning_rate": 5e-05, + "loss": 0.122, + "num_input_tokens_seen": 652290884, + "step": 6727 + }, + { + "epoch": 0.6577043410246383, + "loss": 0.08900073170661926, + "loss_ce": 0.009410890750586987, + "loss_iou": 0.33203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 652290884, + "step": 6727 + }, + { + "epoch": 0.6578021118498241, + "grad_norm": 5.39871590592699, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 652386676, + "step": 6728 + }, + { + "epoch": 0.6578021118498241, + "loss": 0.04779406636953354, + "loss_ce": 0.006931032054126263, + "loss_iou": 0.33984375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 652386676, + "step": 6728 + }, + { + "epoch": 0.6578998826750098, + "grad_norm": 21.553304369195377, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 652483248, + "step": 6729 + }, + { + "epoch": 0.6578998826750098, + "loss": 0.07688207924365997, + "loss_ce": 0.0032279076986014843, + "loss_iou": 0.359375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 652483248, + "step": 6729 + }, + { + "epoch": 0.6579976535001956, + "grad_norm": 12.789020107259306, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 652579964, + "step": 6730 + }, + { + "epoch": 0.6579976535001956, + "loss": 0.07215014845132828, + "loss_ce": 0.005621826276183128, + "loss_iou": 0.296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 652579964, + "step": 6730 + }, + { + "epoch": 0.6580954243253813, + "grad_norm": 9.821236676639524, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 652677572, + "step": 6731 + }, + { + "epoch": 0.6580954243253813, + "loss": 0.08526133745908737, + "loss_ce": 0.0075330715626478195, + "loss_iou": 0.373046875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 652677572, + "step": 6731 + }, + { + "epoch": 0.6581931951505671, + "grad_norm": 5.687179319415823, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 652775476, + "step": 6732 + }, + { + "epoch": 0.6581931951505671, + "loss": 0.047347232699394226, + "loss_ce": 0.007811707444489002, + "loss_iou": 0.3671875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 652775476, + "step": 6732 + }, + { + "epoch": 0.6582909659757529, + "grad_norm": 4.515248186943636, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 652872468, + "step": 6733 + }, + { + "epoch": 0.6582909659757529, + "loss": 0.06012702360749245, + "loss_ce": 0.007121803238987923, + "loss_iou": 0.224609375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 652872468, + "step": 6733 + }, + { + "epoch": 0.6583887368009386, + "grad_norm": 2.7849766308925283, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 652969920, + "step": 6734 + }, + { + "epoch": 0.6583887368009386, + "loss": 0.11208406090736389, + "loss_ce": 0.007408769801259041, + "loss_iou": 0.322265625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 652969920, + "step": 6734 + }, + { + "epoch": 0.6584865076261244, + "grad_norm": 4.6054909704836975, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 653066664, + "step": 6735 + }, + { + "epoch": 0.6584865076261244, + "loss": 0.0855560153722763, + "loss_ce": 0.0025329431518912315, + "loss_iou": 0.267578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 653066664, + "step": 6735 + }, + { + "epoch": 0.6585842784513102, + "grad_norm": 8.38185803294826, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 653164020, + "step": 6736 + }, + { + "epoch": 0.6585842784513102, + "loss": 0.1052619218826294, + "loss_ce": 0.010062327608466148, + "loss_iou": 0.279296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 653164020, + "step": 6736 + }, + { + "epoch": 0.6586820492764959, + "grad_norm": 10.356177033335754, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 653260200, + "step": 6737 + }, + { + "epoch": 0.6586820492764959, + "loss": 0.07571512460708618, + "loss_ce": 0.0039225248619914055, + "loss_iou": 0.267578125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 653260200, + "step": 6737 + }, + { + "epoch": 0.6587798201016817, + "grad_norm": 5.021333346721278, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 653357320, + "step": 6738 + }, + { + "epoch": 0.6587798201016817, + "loss": 0.1277712732553482, + "loss_ce": 0.0038088751025497913, + "loss_iou": 0.2099609375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 653357320, + "step": 6738 + }, + { + "epoch": 0.6588775909268674, + "grad_norm": 21.563059950510596, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 653453252, + "step": 6739 + }, + { + "epoch": 0.6588775909268674, + "loss": 0.1303037852048874, + "loss_ce": 0.002984441351145506, + "loss_iou": 0.30078125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 653453252, + "step": 6739 + }, + { + "epoch": 0.6589753617520532, + "grad_norm": 24.124929397237864, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 653550232, + "step": 6740 + }, + { + "epoch": 0.6589753617520532, + "loss": 0.053408537060022354, + "loss_ce": 0.0063046542927622795, + "loss_iou": 0.326171875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 653550232, + "step": 6740 + }, + { + "epoch": 0.659073132577239, + "grad_norm": 5.6352225110293395, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 653646832, + "step": 6741 + }, + { + "epoch": 0.659073132577239, + "loss": 0.04881530627608299, + "loss_ce": 0.0031991570722311735, + "loss_iou": 0.265625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 653646832, + "step": 6741 + }, + { + "epoch": 0.6591709034024247, + "grad_norm": 7.841578278287201, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 653744096, + "step": 6742 + }, + { + "epoch": 0.6591709034024247, + "loss": 0.0876045823097229, + "loss_ce": 0.004352631513029337, + "loss_iou": 0.2392578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 653744096, + "step": 6742 + }, + { + "epoch": 0.6592686742276105, + "grad_norm": 5.511095540002772, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 653841820, + "step": 6743 + }, + { + "epoch": 0.6592686742276105, + "loss": 0.08487053215503693, + "loss_ce": 0.0034038573503494263, + "loss_iou": 0.359375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 653841820, + "step": 6743 + }, + { + "epoch": 0.6593664450527963, + "grad_norm": 4.7942841795309885, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 653939268, + "step": 6744 + }, + { + "epoch": 0.6593664450527963, + "loss": 0.05460076779127121, + "loss_ce": 0.004139948170632124, + "loss_iou": 0.31640625, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 653939268, + "step": 6744 + }, + { + "epoch": 0.659464215877982, + "grad_norm": 4.9708083664752785, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 654036132, + "step": 6745 + }, + { + "epoch": 0.659464215877982, + "loss": 0.09893748164176941, + "loss_ce": 0.004821266047656536, + "loss_iou": 0.34765625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 654036132, + "step": 6745 + }, + { + "epoch": 0.6595619867031678, + "grad_norm": 21.408150903363648, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 654133112, + "step": 6746 + }, + { + "epoch": 0.6595619867031678, + "loss": 0.06499920785427094, + "loss_ce": 0.007298094220459461, + "loss_iou": 0.337890625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 654133112, + "step": 6746 + }, + { + "epoch": 0.6596597575283535, + "grad_norm": 7.988504953946554, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 654229580, + "step": 6747 + }, + { + "epoch": 0.6596597575283535, + "loss": 0.10801474750041962, + "loss_ce": 0.0032479031942784786, + "loss_iou": 0.2578125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 654229580, + "step": 6747 + }, + { + "epoch": 0.6597575283535393, + "grad_norm": 13.458761915188962, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 654326348, + "step": 6748 + }, + { + "epoch": 0.6597575283535393, + "loss": 0.09099579602479935, + "loss_ce": 0.002845775568857789, + "loss_iou": 0.412109375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 654326348, + "step": 6748 + }, + { + "epoch": 0.6598552991787251, + "grad_norm": 10.830962691013388, + "learning_rate": 5e-05, + "loss": 0.1129, + "num_input_tokens_seen": 654423672, + "step": 6749 + }, + { + "epoch": 0.6598552991787251, + "loss": 0.09123304486274719, + "loss_ce": 0.007580552715808153, + "loss_iou": 0.32421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 654423672, + "step": 6749 + }, + { + "epoch": 0.6599530700039108, + "grad_norm": 19.777531063892493, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 654520132, + "step": 6750 + }, + { + "epoch": 0.6599530700039108, + "eval_seeclick_CIoU": 0.4331670105457306, + "eval_seeclick_GIoU": 0.43668605387210846, + "eval_seeclick_IoU": 0.5038710087537766, + "eval_seeclick_MAE_all": 0.1059233732521534, + "eval_seeclick_MAE_h": 0.03694998845458031, + "eval_seeclick_MAE_w": 0.16783792525529861, + "eval_seeclick_MAE_x": 0.18004018440842628, + "eval_seeclick_MAE_y": 0.038865381851792336, + "eval_seeclick_NUM_probability": 0.999997466802597, + "eval_seeclick_inside_bbox": 0.7542613744735718, + "eval_seeclick_loss": 0.3174927830696106, + "eval_seeclick_loss_ce": 0.009717344772070646, + "eval_seeclick_loss_iou": 0.39080810546875, + "eval_seeclick_loss_num": 0.057422637939453125, + "eval_seeclick_loss_xval": 0.2870330810546875, + "eval_seeclick_runtime": 75.5432, + "eval_seeclick_samples_per_second": 0.569, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 654520132, + "step": 6750 + }, + { + "epoch": 0.6599530700039108, + "eval_icons_CIoU": 0.6716854870319366, + "eval_icons_GIoU": 0.665808916091919, + "eval_icons_IoU": 0.7099238336086273, + "eval_icons_MAE_all": 0.06897038966417313, + "eval_icons_MAE_h": 0.08314760401844978, + "eval_icons_MAE_w": 0.05619809776544571, + "eval_icons_MAE_x": 0.05663600564002991, + "eval_icons_MAE_y": 0.0798998512327671, + "eval_icons_NUM_probability": 0.9999984204769135, + "eval_icons_inside_bbox": 0.8194444477558136, + "eval_icons_loss": 0.2087375968694687, + "eval_icons_loss_ce": 1.1604736300796503e-05, + "eval_icons_loss_iou": 0.33758544921875, + "eval_icons_loss_num": 0.04594612121582031, + "eval_icons_loss_xval": 0.230010986328125, + "eval_icons_runtime": 99.7735, + "eval_icons_samples_per_second": 0.501, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 654520132, + "step": 6750 + }, + { + "epoch": 0.6599530700039108, + "eval_screenspot_CIoU": 0.32679752508799237, + "eval_screenspot_GIoU": 0.30000505844751996, + "eval_screenspot_IoU": 0.40975697835286456, + "eval_screenspot_MAE_all": 0.15770799418290457, + "eval_screenspot_MAE_h": 0.120294488966465, + "eval_screenspot_MAE_w": 0.20796634256839752, + "eval_screenspot_MAE_x": 0.1869368851184845, + "eval_screenspot_MAE_y": 0.11563422034184138, + "eval_screenspot_NUM_probability": 0.9999927679697672, + "eval_screenspot_inside_bbox": 0.6508333285649618, + "eval_screenspot_loss": 0.5693936944007874, + "eval_screenspot_loss_ce": 0.02477827916542689, + "eval_screenspot_loss_iou": 0.351806640625, + "eval_screenspot_loss_num": 0.10935465494791667, + "eval_screenspot_loss_xval": 0.5470377604166666, + "eval_screenspot_runtime": 160.0705, + "eval_screenspot_samples_per_second": 0.556, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 654520132, + "step": 6750 + }, + { + "epoch": 0.6599530700039108, + "eval_compot_CIoU": 0.40489502251148224, + "eval_compot_GIoU": 0.37332338094711304, + "eval_compot_IoU": 0.5016587376594543, + "eval_compot_MAE_all": 0.11684631556272507, + "eval_compot_MAE_h": 0.0913340151309967, + "eval_compot_MAE_w": 0.14288419485092163, + "eval_compot_MAE_x": 0.1407737359404564, + "eval_compot_MAE_y": 0.09239332005381584, + "eval_compot_NUM_probability": 0.9999823570251465, + "eval_compot_inside_bbox": 0.6927083432674408, + "eval_compot_loss": 0.3397844731807709, + "eval_compot_loss_ce": 0.015198822598904371, + "eval_compot_loss_iou": 0.3985595703125, + "eval_compot_loss_num": 0.057952880859375, + "eval_compot_loss_xval": 0.2897491455078125, + "eval_compot_runtime": 91.5901, + "eval_compot_samples_per_second": 0.546, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 654520132, + "step": 6750 + }, + { + "epoch": 0.6599530700039108, + "loss": 0.29596495628356934, + "loss_ce": 0.016423940658569336, + "loss_iou": 0.384765625, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 654520132, + "step": 6750 + }, + { + "epoch": 0.6600508408290966, + "grad_norm": 8.666213197096935, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 654617108, + "step": 6751 + }, + { + "epoch": 0.6600508408290966, + "loss": 0.05958220735192299, + "loss_ce": 0.0029720996972173452, + "loss_iou": 0.375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 654617108, + "step": 6751 + }, + { + "epoch": 0.6601486116542824, + "grad_norm": 17.627214940170226, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 654713376, + "step": 6752 + }, + { + "epoch": 0.6601486116542824, + "loss": 0.059726230800151825, + "loss_ce": 0.003329751081764698, + "loss_iou": 0.27734375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 654713376, + "step": 6752 + }, + { + "epoch": 0.6602463824794681, + "grad_norm": 8.916268478289107, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 654809780, + "step": 6753 + }, + { + "epoch": 0.6602463824794681, + "loss": 0.06362023949623108, + "loss_ce": 0.0036379448138177395, + "loss_iou": 0.25390625, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 654809780, + "step": 6753 + }, + { + "epoch": 0.6603441533046539, + "grad_norm": 19.074865847734497, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 654906444, + "step": 6754 + }, + { + "epoch": 0.6603441533046539, + "loss": 0.1208992600440979, + "loss_ce": 0.0028648951556533575, + "loss_iou": 0.283203125, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 654906444, + "step": 6754 + }, + { + "epoch": 0.6604419241298397, + "grad_norm": 3.763040052651864, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 655002516, + "step": 6755 + }, + { + "epoch": 0.6604419241298397, + "loss": 0.07846380770206451, + "loss_ce": 0.003215090837329626, + "loss_iou": 0.302734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 655002516, + "step": 6755 + }, + { + "epoch": 0.6605396949550254, + "grad_norm": 18.04518247368864, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 655100588, + "step": 6756 + }, + { + "epoch": 0.6605396949550254, + "loss": 0.07571005821228027, + "loss_ce": 0.0027577881701290607, + "loss_iou": 0.3046875, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 655100588, + "step": 6756 + }, + { + "epoch": 0.6606374657802112, + "grad_norm": 21.97000158103804, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 655197724, + "step": 6757 + }, + { + "epoch": 0.6606374657802112, + "loss": 0.06863389909267426, + "loss_ce": 0.007003651931881905, + "loss_iou": 0.21875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 655197724, + "step": 6757 + }, + { + "epoch": 0.6607352366053969, + "grad_norm": 14.57393782197362, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 655294152, + "step": 6758 + }, + { + "epoch": 0.6607352366053969, + "loss": 0.06505471467971802, + "loss_ce": 0.002348724054172635, + "loss_iou": 0.216796875, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 655294152, + "step": 6758 + }, + { + "epoch": 0.6608330074305827, + "grad_norm": 14.93577446765453, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 655391172, + "step": 6759 + }, + { + "epoch": 0.6608330074305827, + "loss": 0.09610570222139359, + "loss_ce": 0.006666310131549835, + "loss_iou": 0.25390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 655391172, + "step": 6759 + }, + { + "epoch": 0.6609307782557685, + "grad_norm": 6.603186642060666, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 655487248, + "step": 6760 + }, + { + "epoch": 0.6609307782557685, + "loss": 0.07795462757349014, + "loss_ce": 0.0020364278461784124, + "loss_iou": 0.1640625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 655487248, + "step": 6760 + }, + { + "epoch": 0.6610285490809542, + "grad_norm": 7.918088735294012, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 655583368, + "step": 6761 + }, + { + "epoch": 0.6610285490809542, + "loss": 0.05411793291568756, + "loss_ce": 0.004116789437830448, + "loss_iou": 0.25, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 655583368, + "step": 6761 + }, + { + "epoch": 0.66112631990614, + "grad_norm": 35.437862632702675, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 655681052, + "step": 6762 + }, + { + "epoch": 0.66112631990614, + "loss": 0.06968609243631363, + "loss_ce": 0.008643308654427528, + "loss_iou": 0.1796875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 655681052, + "step": 6762 + }, + { + "epoch": 0.6612240907313258, + "grad_norm": 4.1224246863706195, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 655777876, + "step": 6763 + }, + { + "epoch": 0.6612240907313258, + "loss": 0.07161629945039749, + "loss_ce": 0.004634028300642967, + "loss_iou": 0.1728515625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 655777876, + "step": 6763 + }, + { + "epoch": 0.6613218615565115, + "grad_norm": 2.465108822914329, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 655875456, + "step": 6764 + }, + { + "epoch": 0.6613218615565115, + "loss": 0.048372820019721985, + "loss_ce": 0.005503255408257246, + "loss_iou": 0.279296875, + "loss_num": 0.008544921875, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 655875456, + "step": 6764 + }, + { + "epoch": 0.6614196323816973, + "grad_norm": 4.9793461810471324, + "learning_rate": 5e-05, + "loss": 0.1327, + "num_input_tokens_seen": 655972340, + "step": 6765 + }, + { + "epoch": 0.6614196323816973, + "loss": 0.14753574132919312, + "loss_ce": 0.01004642341285944, + "loss_iou": 0.201171875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 655972340, + "step": 6765 + }, + { + "epoch": 0.661517403206883, + "grad_norm": 6.0994226494531745, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 656068716, + "step": 6766 + }, + { + "epoch": 0.661517403206883, + "loss": 0.1304101049900055, + "loss_ce": 0.007805731147527695, + "loss_iou": 0.306640625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 656068716, + "step": 6766 + }, + { + "epoch": 0.6616151740320688, + "grad_norm": 4.777461007904541, + "learning_rate": 5e-05, + "loss": 0.1227, + "num_input_tokens_seen": 656165468, + "step": 6767 + }, + { + "epoch": 0.6616151740320688, + "loss": 0.08033332228660583, + "loss_ce": 0.009273143485188484, + "loss_iou": 0.25390625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 656165468, + "step": 6767 + }, + { + "epoch": 0.6617129448572546, + "grad_norm": 6.125116413415576, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 656263552, + "step": 6768 + }, + { + "epoch": 0.6617129448572546, + "loss": 0.0700838640332222, + "loss_ce": 0.006942992098629475, + "loss_iou": 0.40625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 656263552, + "step": 6768 + }, + { + "epoch": 0.6618107156824403, + "grad_norm": 9.579339873730268, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 656361032, + "step": 6769 + }, + { + "epoch": 0.6618107156824403, + "loss": 0.07654830068349838, + "loss_ce": 0.006205283105373383, + "loss_iou": 0.4609375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 656361032, + "step": 6769 + }, + { + "epoch": 0.6619084865076261, + "grad_norm": 2.772036171218457, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 656457776, + "step": 6770 + }, + { + "epoch": 0.6619084865076261, + "loss": 0.05842118710279465, + "loss_ce": 0.005061205476522446, + "loss_iou": 0.19921875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 656457776, + "step": 6770 + }, + { + "epoch": 0.6620062573328119, + "grad_norm": 24.56501037415868, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 656554640, + "step": 6771 + }, + { + "epoch": 0.6620062573328119, + "loss": 0.07288956642150879, + "loss_ce": 0.00470566563308239, + "loss_iou": 0.34375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 656554640, + "step": 6771 + }, + { + "epoch": 0.6621040281579976, + "grad_norm": 32.89026648866738, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 656651456, + "step": 6772 + }, + { + "epoch": 0.6621040281579976, + "loss": 0.06160188466310501, + "loss_ce": 0.0066015757620334625, + "loss_iou": 0.376953125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 656651456, + "step": 6772 + }, + { + "epoch": 0.6622017989831834, + "grad_norm": 8.163128445704787, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 656748656, + "step": 6773 + }, + { + "epoch": 0.6622017989831834, + "loss": 0.056944847106933594, + "loss_ce": 0.0077657680958509445, + "loss_iou": 0.205078125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 656748656, + "step": 6773 + }, + { + "epoch": 0.6622995698083692, + "grad_norm": 23.075703839883985, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 656844980, + "step": 6774 + }, + { + "epoch": 0.6622995698083692, + "loss": 0.03831116482615471, + "loss_ce": 0.0052987756207585335, + "loss_iou": 0.2431640625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 656844980, + "step": 6774 + }, + { + "epoch": 0.6623973406335549, + "grad_norm": 17.70599930615627, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 656943344, + "step": 6775 + }, + { + "epoch": 0.6623973406335549, + "loss": 0.0941355973482132, + "loss_ce": 0.008785557001829147, + "loss_iou": 0.330078125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 656943344, + "step": 6775 + }, + { + "epoch": 0.6624951114587407, + "grad_norm": 24.755130900844495, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 657039592, + "step": 6776 + }, + { + "epoch": 0.6624951114587407, + "loss": 0.054216742515563965, + "loss_ce": 0.006319406442344189, + "loss_iou": 0.28515625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 657039592, + "step": 6776 + }, + { + "epoch": 0.6625928822839264, + "grad_norm": 59.65801143122561, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 657136432, + "step": 6777 + }, + { + "epoch": 0.6625928822839264, + "loss": 0.07042817771434784, + "loss_ce": 0.004254620522260666, + "loss_iou": 0.345703125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 657136432, + "step": 6777 + }, + { + "epoch": 0.6626906531091122, + "grad_norm": 7.254181333822456, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 657232808, + "step": 6778 + }, + { + "epoch": 0.6626906531091122, + "loss": 0.11293462663888931, + "loss_ce": 0.005169429816305637, + "loss_iou": 0.2734375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 657232808, + "step": 6778 + }, + { + "epoch": 0.662788423934298, + "grad_norm": 6.109199053917159, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 657329776, + "step": 6779 + }, + { + "epoch": 0.662788423934298, + "loss": 0.09697455912828445, + "loss_ce": 0.006169507279992104, + "loss_iou": 0.2734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 657329776, + "step": 6779 + }, + { + "epoch": 0.6628861947594837, + "grad_norm": 6.989071625123681, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 657426588, + "step": 6780 + }, + { + "epoch": 0.6628861947594837, + "loss": 0.09712295234203339, + "loss_ce": 0.005776213016360998, + "loss_iou": 0.27734375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 657426588, + "step": 6780 + }, + { + "epoch": 0.6629839655846695, + "grad_norm": 7.9786629606020245, + "learning_rate": 5e-05, + "loss": 0.1415, + "num_input_tokens_seen": 657523668, + "step": 6781 + }, + { + "epoch": 0.6629839655846695, + "loss": 0.153366357088089, + "loss_ce": 0.011238368228077888, + "loss_iou": 0.177734375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 657523668, + "step": 6781 + }, + { + "epoch": 0.6630817364098553, + "grad_norm": 4.116065850155958, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 657620572, + "step": 6782 + }, + { + "epoch": 0.6630817364098553, + "loss": 0.03814172372221947, + "loss_ce": 0.008444305509328842, + "loss_iou": 0.16796875, + "loss_num": 0.005950927734375, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 657620572, + "step": 6782 + }, + { + "epoch": 0.663179507235041, + "grad_norm": 8.094609903946017, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 657717180, + "step": 6783 + }, + { + "epoch": 0.663179507235041, + "loss": 0.04731431230902672, + "loss_ce": 0.0030485650058835745, + "loss_iou": 0.212890625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 657717180, + "step": 6783 + }, + { + "epoch": 0.6632772780602268, + "grad_norm": 2.1549161490003015, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 657813968, + "step": 6784 + }, + { + "epoch": 0.6632772780602268, + "loss": 0.05738995596766472, + "loss_ce": 0.005555849522352219, + "loss_iou": 0.26953125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 657813968, + "step": 6784 + }, + { + "epoch": 0.6633750488854125, + "grad_norm": 2.968951347712876, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 657911040, + "step": 6785 + }, + { + "epoch": 0.6633750488854125, + "loss": 0.08441397547721863, + "loss_ce": 0.0037712783087044954, + "loss_iou": 0.2353515625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 657911040, + "step": 6785 + }, + { + "epoch": 0.6634728197105983, + "grad_norm": 12.874719951298436, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 658008976, + "step": 6786 + }, + { + "epoch": 0.6634728197105983, + "loss": 0.06170973926782608, + "loss_ce": 0.005168294999748468, + "loss_iou": 0.314453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 658008976, + "step": 6786 + }, + { + "epoch": 0.6635705905357842, + "grad_norm": 6.033051236891027, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 658105588, + "step": 6787 + }, + { + "epoch": 0.6635705905357842, + "loss": 0.07854336500167847, + "loss_ce": 0.004263576585799456, + "loss_iou": 0.265625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 658105588, + "step": 6787 + }, + { + "epoch": 0.6636683613609698, + "grad_norm": 5.415025921967951, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 658202504, + "step": 6788 + }, + { + "epoch": 0.6636683613609698, + "loss": 0.03666841238737106, + "loss_ce": 0.005693070124834776, + "loss_iou": 0.322265625, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 658202504, + "step": 6788 + }, + { + "epoch": 0.6637661321861557, + "grad_norm": 12.732621844432526, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 658298660, + "step": 6789 + }, + { + "epoch": 0.6637661321861557, + "loss": 0.0447823666036129, + "loss_ce": 0.003598893992602825, + "loss_iou": 0.30078125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 658298660, + "step": 6789 + }, + { + "epoch": 0.6638639030113415, + "grad_norm": 16.014365211245963, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 658395712, + "step": 6790 + }, + { + "epoch": 0.6638639030113415, + "loss": 0.09829885512590408, + "loss_ce": 0.0028627600986510515, + "loss_iou": 0.2451171875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 658395712, + "step": 6790 + }, + { + "epoch": 0.6639616738365272, + "grad_norm": 3.981821693490003, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 658491956, + "step": 6791 + }, + { + "epoch": 0.6639616738365272, + "loss": 0.060117438435554504, + "loss_ce": 0.013509463518857956, + "loss_iou": 0.2099609375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 658491956, + "step": 6791 + }, + { + "epoch": 0.664059444661713, + "grad_norm": 5.346745117248553, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 658588968, + "step": 6792 + }, + { + "epoch": 0.664059444661713, + "loss": 0.12012171000242233, + "loss_ce": 0.0034835252445191145, + "loss_iou": 0.3125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 658588968, + "step": 6792 + }, + { + "epoch": 0.6641572154868987, + "grad_norm": 9.791494216577492, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 658685424, + "step": 6793 + }, + { + "epoch": 0.6641572154868987, + "loss": 0.0784812867641449, + "loss_ce": 0.00967941153794527, + "loss_iou": 0.314453125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 658685424, + "step": 6793 + }, + { + "epoch": 0.6642549863120845, + "grad_norm": 12.25869629783992, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 658781696, + "step": 6794 + }, + { + "epoch": 0.6642549863120845, + "loss": 0.10598955303430557, + "loss_ce": 0.015535453334450722, + "loss_iou": 0.251953125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 658781696, + "step": 6794 + }, + { + "epoch": 0.6643527571372703, + "grad_norm": 10.876345774214675, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 658878164, + "step": 6795 + }, + { + "epoch": 0.6643527571372703, + "loss": 0.06444410979747772, + "loss_ce": 0.0022340286523103714, + "loss_iou": 0.2451171875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 658878164, + "step": 6795 + }, + { + "epoch": 0.664450527962456, + "grad_norm": 6.256986756859457, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 658975652, + "step": 6796 + }, + { + "epoch": 0.664450527962456, + "loss": 0.1019231379032135, + "loss_ce": 0.0052053057588636875, + "loss_iou": 0.26171875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 658975652, + "step": 6796 + }, + { + "epoch": 0.6645482987876418, + "grad_norm": 9.131878594938629, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 659072428, + "step": 6797 + }, + { + "epoch": 0.6645482987876418, + "loss": 0.09754376858472824, + "loss_ce": 0.007967047393321991, + "loss_iou": 0.353515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 659072428, + "step": 6797 + }, + { + "epoch": 0.6646460696128276, + "grad_norm": 15.925907499050588, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 659169692, + "step": 6798 + }, + { + "epoch": 0.6646460696128276, + "loss": 0.0690779760479927, + "loss_ce": 0.004243378061801195, + "loss_iou": 0.287109375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 659169692, + "step": 6798 + }, + { + "epoch": 0.6647438404380133, + "grad_norm": 19.226629109251583, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 659266472, + "step": 6799 + }, + { + "epoch": 0.6647438404380133, + "loss": 0.07773137837648392, + "loss_ce": 0.005694635212421417, + "loss_iou": 0.23046875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 659266472, + "step": 6799 + }, + { + "epoch": 0.6648416112631991, + "grad_norm": 27.00295768229681, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 659364372, + "step": 6800 + }, + { + "epoch": 0.6648416112631991, + "loss": 0.09758153557777405, + "loss_ce": 0.004686027765274048, + "loss_iou": 0.318359375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 659364372, + "step": 6800 + }, + { + "epoch": 0.6649393820883849, + "grad_norm": 19.4817768800548, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 659461332, + "step": 6801 + }, + { + "epoch": 0.6649393820883849, + "loss": 0.07064821571111679, + "loss_ce": 0.003845237661153078, + "loss_iou": 0.3515625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 659461332, + "step": 6801 + }, + { + "epoch": 0.6650371529135706, + "grad_norm": 6.073807182439097, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 659557632, + "step": 6802 + }, + { + "epoch": 0.6650371529135706, + "loss": 0.062115613371133804, + "loss_ce": 0.009533828124403954, + "loss_iou": 0.236328125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 659557632, + "step": 6802 + }, + { + "epoch": 0.6651349237387564, + "grad_norm": 3.8597581085141317, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 659653308, + "step": 6803 + }, + { + "epoch": 0.6651349237387564, + "loss": 0.09140429645776749, + "loss_ce": 0.003452627919614315, + "loss_iou": 0.30078125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 659653308, + "step": 6803 + }, + { + "epoch": 0.6652326945639421, + "grad_norm": 10.896251424442765, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 659750680, + "step": 6804 + }, + { + "epoch": 0.6652326945639421, + "loss": 0.06773677468299866, + "loss_ce": 0.003199727274477482, + "loss_iou": 0.24609375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 659750680, + "step": 6804 + }, + { + "epoch": 0.6653304653891279, + "grad_norm": 9.590184302261662, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 659847572, + "step": 6805 + }, + { + "epoch": 0.6653304653891279, + "loss": 0.0931951031088829, + "loss_ce": 0.0056249164044857025, + "loss_iou": 0.322265625, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 659847572, + "step": 6805 + }, + { + "epoch": 0.6654282362143137, + "grad_norm": 3.732776597828167, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 659944804, + "step": 6806 + }, + { + "epoch": 0.6654282362143137, + "loss": 0.11084601283073425, + "loss_ce": 0.006872624158859253, + "loss_iou": 0.31640625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 659944804, + "step": 6806 + }, + { + "epoch": 0.6655260070394994, + "grad_norm": 14.277284454247484, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 660042704, + "step": 6807 + }, + { + "epoch": 0.6655260070394994, + "loss": 0.06899616122245789, + "loss_ce": 0.006038401275873184, + "loss_iou": 0.466796875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 660042704, + "step": 6807 + }, + { + "epoch": 0.6656237778646852, + "grad_norm": 4.3556329709643755, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 660139912, + "step": 6808 + }, + { + "epoch": 0.6656237778646852, + "loss": 0.06074546277523041, + "loss_ce": 0.005356060806661844, + "loss_iou": 0.42578125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 660139912, + "step": 6808 + }, + { + "epoch": 0.665721548689871, + "grad_norm": 5.289520144803969, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 660235704, + "step": 6809 + }, + { + "epoch": 0.665721548689871, + "loss": 0.07025967538356781, + "loss_ce": 0.008141148835420609, + "loss_iou": 0.359375, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 660235704, + "step": 6809 + }, + { + "epoch": 0.6658193195150567, + "grad_norm": 3.4413873007722176, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 660332348, + "step": 6810 + }, + { + "epoch": 0.6658193195150567, + "loss": 0.06306593120098114, + "loss_ce": 0.002259661443531513, + "loss_iou": 0.2578125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 660332348, + "step": 6810 + }, + { + "epoch": 0.6659170903402425, + "grad_norm": 18.19075766122084, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 660429800, + "step": 6811 + }, + { + "epoch": 0.6659170903402425, + "loss": 0.08806513994932175, + "loss_ce": 0.0018529875669628382, + "loss_iou": 0.3046875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 660429800, + "step": 6811 + }, + { + "epoch": 0.6660148611654282, + "grad_norm": 4.542382785129801, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 660526448, + "step": 6812 + }, + { + "epoch": 0.6660148611654282, + "loss": 0.09036967158317566, + "loss_ce": 0.004882305860519409, + "loss_iou": 0.33203125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 660526448, + "step": 6812 + }, + { + "epoch": 0.666112631990614, + "grad_norm": 13.594503377712755, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 660623000, + "step": 6813 + }, + { + "epoch": 0.666112631990614, + "loss": 0.09957198053598404, + "loss_ce": 0.006371296942234039, + "loss_iou": 0.357421875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 660623000, + "step": 6813 + }, + { + "epoch": 0.6662104028157998, + "grad_norm": 2.1513206780372425, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 660719464, + "step": 6814 + }, + { + "epoch": 0.6662104028157998, + "loss": 0.05553455650806427, + "loss_ce": 0.009784892201423645, + "loss_iou": 0.275390625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 660719464, + "step": 6814 + }, + { + "epoch": 0.6663081736409855, + "grad_norm": 7.9173708883847675, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 660816124, + "step": 6815 + }, + { + "epoch": 0.6663081736409855, + "loss": 0.06940934807062149, + "loss_ce": 0.0044221654534339905, + "loss_iou": 0.2294921875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 660816124, + "step": 6815 + }, + { + "epoch": 0.6664059444661713, + "grad_norm": 4.458831349998917, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 660913076, + "step": 6816 + }, + { + "epoch": 0.6664059444661713, + "loss": 0.06722983717918396, + "loss_ce": 0.004775617271661758, + "loss_iou": 0.314453125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 660913076, + "step": 6816 + }, + { + "epoch": 0.6665037152913571, + "grad_norm": 7.417279146779955, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 661009992, + "step": 6817 + }, + { + "epoch": 0.6665037152913571, + "loss": 0.0905953049659729, + "loss_ce": 0.0044594439677894115, + "loss_iou": 0.279296875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 661009992, + "step": 6817 + }, + { + "epoch": 0.6666014861165428, + "grad_norm": 8.11725089477291, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 661106208, + "step": 6818 + }, + { + "epoch": 0.6666014861165428, + "loss": 0.06253989040851593, + "loss_ce": 0.00786764919757843, + "loss_iou": 0.203125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 661106208, + "step": 6818 + }, + { + "epoch": 0.6666992569417286, + "grad_norm": 4.404264114707281, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 661203396, + "step": 6819 + }, + { + "epoch": 0.6666992569417286, + "loss": 0.07713790982961655, + "loss_ce": 0.004139863885939121, + "loss_iou": 0.259765625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 661203396, + "step": 6819 + }, + { + "epoch": 0.6667970277669144, + "grad_norm": 1.6144314647166353, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 661300108, + "step": 6820 + }, + { + "epoch": 0.6667970277669144, + "loss": 0.08004754781723022, + "loss_ce": 0.004638606682419777, + "loss_iou": 0.2578125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 661300108, + "step": 6820 + }, + { + "epoch": 0.6668947985921001, + "grad_norm": 11.825067695019847, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 661397788, + "step": 6821 + }, + { + "epoch": 0.6668947985921001, + "loss": 0.1288580447435379, + "loss_ce": 0.008229686878621578, + "loss_iou": 0.267578125, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 661397788, + "step": 6821 + }, + { + "epoch": 0.6669925694172859, + "grad_norm": 15.266643164305039, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 661495524, + "step": 6822 + }, + { + "epoch": 0.6669925694172859, + "loss": 0.11379817128181458, + "loss_ce": 0.001752882613800466, + "loss_iou": 0.275390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 661495524, + "step": 6822 + }, + { + "epoch": 0.6670903402424716, + "grad_norm": 4.091017361372495, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 661592312, + "step": 6823 + }, + { + "epoch": 0.6670903402424716, + "loss": 0.08122500777244568, + "loss_ce": 0.006365382112562656, + "loss_iou": 0.3046875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 661592312, + "step": 6823 + }, + { + "epoch": 0.6671881110676574, + "grad_norm": 10.94622464425254, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 661689892, + "step": 6824 + }, + { + "epoch": 0.6671881110676574, + "loss": 0.09013959020376205, + "loss_ce": 0.004369937349110842, + "loss_iou": 0.27734375, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 661689892, + "step": 6824 + }, + { + "epoch": 0.6672858818928432, + "grad_norm": 2.807817361614259, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 661787124, + "step": 6825 + }, + { + "epoch": 0.6672858818928432, + "loss": 0.07968218624591827, + "loss_ce": 0.008652516640722752, + "loss_iou": 0.30859375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 661787124, + "step": 6825 + }, + { + "epoch": 0.6673836527180289, + "grad_norm": 22.308515405287213, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 661883872, + "step": 6826 + }, + { + "epoch": 0.6673836527180289, + "loss": 0.05997800827026367, + "loss_ce": 0.005656715482473373, + "loss_iou": 0.296875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 661883872, + "step": 6826 + }, + { + "epoch": 0.6674814235432147, + "grad_norm": 35.92095419983195, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 661980420, + "step": 6827 + }, + { + "epoch": 0.6674814235432147, + "loss": 0.10122748464345932, + "loss_ce": 0.004456243012100458, + "loss_iou": 0.330078125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 661980420, + "step": 6827 + }, + { + "epoch": 0.6675791943684005, + "grad_norm": 11.38159405556596, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 662077632, + "step": 6828 + }, + { + "epoch": 0.6675791943684005, + "loss": 0.09461678564548492, + "loss_ce": 0.005661855451762676, + "loss_iou": 0.271484375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 662077632, + "step": 6828 + }, + { + "epoch": 0.6676769651935862, + "grad_norm": 13.739016885363514, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 662175040, + "step": 6829 + }, + { + "epoch": 0.6676769651935862, + "loss": 0.08172006160020828, + "loss_ce": 0.008065886795520782, + "loss_iou": 0.28515625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 662175040, + "step": 6829 + }, + { + "epoch": 0.667774736018772, + "grad_norm": 9.89396383365772, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 662272172, + "step": 6830 + }, + { + "epoch": 0.667774736018772, + "loss": 0.05681592598557472, + "loss_ce": 0.0045087989419698715, + "loss_iou": 0.30859375, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 662272172, + "step": 6830 + }, + { + "epoch": 0.6678725068439577, + "grad_norm": 11.801998115681057, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 662369304, + "step": 6831 + }, + { + "epoch": 0.6678725068439577, + "loss": 0.07061048597097397, + "loss_ce": 0.002205334836617112, + "loss_iou": 0.384765625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 662369304, + "step": 6831 + }, + { + "epoch": 0.6679702776691435, + "grad_norm": 7.804760465894901, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 662465188, + "step": 6832 + }, + { + "epoch": 0.6679702776691435, + "loss": 0.07258061319589615, + "loss_ce": 0.00843266025185585, + "loss_iou": 0.23046875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 662465188, + "step": 6832 + }, + { + "epoch": 0.6680680484943293, + "grad_norm": 3.216204056789273, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 662562508, + "step": 6833 + }, + { + "epoch": 0.6680680484943293, + "loss": 0.06700673699378967, + "loss_ce": 0.0016457121819257736, + "loss_iou": 0.349609375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 662562508, + "step": 6833 + }, + { + "epoch": 0.668165819319515, + "grad_norm": 6.590094282623956, + "learning_rate": 5e-05, + "loss": 0.1223, + "num_input_tokens_seen": 662659196, + "step": 6834 + }, + { + "epoch": 0.668165819319515, + "loss": 0.1344483196735382, + "loss_ce": 0.012927333824336529, + "loss_iou": 0.2265625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 662659196, + "step": 6834 + }, + { + "epoch": 0.6682635901447008, + "grad_norm": 5.338710391533873, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 662756792, + "step": 6835 + }, + { + "epoch": 0.6682635901447008, + "loss": 0.05561155453324318, + "loss_ce": 0.006249373313039541, + "loss_iou": 0.34375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 662756792, + "step": 6835 + }, + { + "epoch": 0.6683613609698866, + "grad_norm": 3.950572618467998, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 662854324, + "step": 6836 + }, + { + "epoch": 0.6683613609698866, + "loss": 0.04729504883289337, + "loss_ce": 0.005562263540923595, + "loss_iou": 0.314453125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 662854324, + "step": 6836 + }, + { + "epoch": 0.6684591317950723, + "grad_norm": 3.8341886703773658, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 662951540, + "step": 6837 + }, + { + "epoch": 0.6684591317950723, + "loss": 0.0958087146282196, + "loss_ce": 0.004103390499949455, + "loss_iou": 0.37890625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 662951540, + "step": 6837 + }, + { + "epoch": 0.6685569026202581, + "grad_norm": 7.246336867047222, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 663048816, + "step": 6838 + }, + { + "epoch": 0.6685569026202581, + "loss": 0.06118746101856232, + "loss_ce": 0.003722863271832466, + "loss_iou": 0.37890625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 663048816, + "step": 6838 + }, + { + "epoch": 0.6686546734454438, + "grad_norm": 12.946276098345384, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 663145888, + "step": 6839 + }, + { + "epoch": 0.6686546734454438, + "loss": 0.12564224004745483, + "loss_ce": 0.012689052149653435, + "loss_iou": 0.306640625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 663145888, + "step": 6839 + }, + { + "epoch": 0.6687524442706296, + "grad_norm": 3.1349948219517523, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 663242584, + "step": 6840 + }, + { + "epoch": 0.6687524442706296, + "loss": 0.08305054903030396, + "loss_ce": 0.004277048632502556, + "loss_iou": 0.23046875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 663242584, + "step": 6840 + }, + { + "epoch": 0.6688502150958154, + "grad_norm": 4.99662037788251, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 663339656, + "step": 6841 + }, + { + "epoch": 0.6688502150958154, + "loss": 0.1254083216190338, + "loss_ce": 0.008434455841779709, + "loss_iou": 0.267578125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 663339656, + "step": 6841 + }, + { + "epoch": 0.6689479859210011, + "grad_norm": 9.260599585601426, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 663437252, + "step": 6842 + }, + { + "epoch": 0.6689479859210011, + "loss": 0.06025801971554756, + "loss_ce": 0.0029154892545193434, + "loss_iou": 0.28515625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 663437252, + "step": 6842 + }, + { + "epoch": 0.6690457567461869, + "grad_norm": 2.616230059772282, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 663533640, + "step": 6843 + }, + { + "epoch": 0.6690457567461869, + "loss": 0.05845971778035164, + "loss_ce": 0.0020021956879645586, + "loss_iou": 0.32421875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 663533640, + "step": 6843 + }, + { + "epoch": 0.6691435275713727, + "grad_norm": 1.8236952159043558, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 663631172, + "step": 6844 + }, + { + "epoch": 0.6691435275713727, + "loss": 0.0946069210767746, + "loss_ce": 0.010256333276629448, + "loss_iou": 0.27734375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 663631172, + "step": 6844 + }, + { + "epoch": 0.6692412983965584, + "grad_norm": 5.845406655195463, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 663727344, + "step": 6845 + }, + { + "epoch": 0.6692412983965584, + "loss": 0.05849912017583847, + "loss_ce": 0.004711886867880821, + "loss_iou": 0.189453125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 663727344, + "step": 6845 + }, + { + "epoch": 0.6693390692217442, + "grad_norm": 9.036053728616205, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 663824608, + "step": 6846 + }, + { + "epoch": 0.6693390692217442, + "loss": 0.07129115611314774, + "loss_ce": 0.0061132400296628475, + "loss_iou": 0.275390625, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 663824608, + "step": 6846 + }, + { + "epoch": 0.66943684004693, + "grad_norm": 2.769609493040041, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 663922676, + "step": 6847 + }, + { + "epoch": 0.66943684004693, + "loss": 0.058442093431949615, + "loss_ce": 0.004776929970830679, + "loss_iou": 0.240234375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 663922676, + "step": 6847 + }, + { + "epoch": 0.6695346108721157, + "grad_norm": 9.190031076121244, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 664019460, + "step": 6848 + }, + { + "epoch": 0.6695346108721157, + "loss": 0.09121585637331009, + "loss_ce": 0.0038516572676599026, + "loss_iou": 0.193359375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 664019460, + "step": 6848 + }, + { + "epoch": 0.6696323816973015, + "grad_norm": 20.722067986889993, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 664116556, + "step": 6849 + }, + { + "epoch": 0.6696323816973015, + "loss": 0.06432844698429108, + "loss_ce": 0.009366288781166077, + "loss_iou": 0.28515625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 664116556, + "step": 6849 + }, + { + "epoch": 0.6697301525224872, + "grad_norm": 6.825975667664756, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 664213156, + "step": 6850 + }, + { + "epoch": 0.6697301525224872, + "loss": 0.08919531106948853, + "loss_ce": 0.003730839118361473, + "loss_iou": 0.29296875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 664213156, + "step": 6850 + }, + { + "epoch": 0.669827923347673, + "grad_norm": 5.887925995943624, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 664310640, + "step": 6851 + }, + { + "epoch": 0.669827923347673, + "loss": 0.06856606155633926, + "loss_ce": 0.006233911961317062, + "loss_iou": 0.322265625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 664310640, + "step": 6851 + }, + { + "epoch": 0.6699256941728589, + "grad_norm": 9.908598717572017, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 664407716, + "step": 6852 + }, + { + "epoch": 0.6699256941728589, + "loss": 0.05872625857591629, + "loss_ce": 0.003901429008692503, + "loss_iou": 0.2578125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 664407716, + "step": 6852 + }, + { + "epoch": 0.6700234649980445, + "grad_norm": 7.561670871124879, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 664505104, + "step": 6853 + }, + { + "epoch": 0.6700234649980445, + "loss": 0.07417212426662445, + "loss_ce": 0.002974621020257473, + "loss_iou": 0.328125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 664505104, + "step": 6853 + }, + { + "epoch": 0.6701212358232304, + "grad_norm": 3.2632117274059573, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 664602660, + "step": 6854 + }, + { + "epoch": 0.6701212358232304, + "loss": 0.09264571964740753, + "loss_ce": 0.007410123012959957, + "loss_iou": 0.39453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 664602660, + "step": 6854 + }, + { + "epoch": 0.6702190066484162, + "grad_norm": 9.53227763319134, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 664699692, + "step": 6855 + }, + { + "epoch": 0.6702190066484162, + "loss": 0.07436380535364151, + "loss_ce": 0.00427064998075366, + "loss_iou": 0.2236328125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 664699692, + "step": 6855 + }, + { + "epoch": 0.6703167774736019, + "grad_norm": 8.37260676119313, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 664796724, + "step": 6856 + }, + { + "epoch": 0.6703167774736019, + "loss": 0.09182891994714737, + "loss_ce": 0.01064453087747097, + "loss_iou": 0.275390625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 664796724, + "step": 6856 + }, + { + "epoch": 0.6704145482987877, + "grad_norm": 2.405675617822302, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 664893464, + "step": 6857 + }, + { + "epoch": 0.6704145482987877, + "loss": 0.04853604733943939, + "loss_ce": 0.0047890981659293175, + "loss_iou": 0.26953125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 664893464, + "step": 6857 + }, + { + "epoch": 0.6705123191239734, + "grad_norm": 3.9324064242366603, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 664990620, + "step": 6858 + }, + { + "epoch": 0.6705123191239734, + "loss": 0.036703482270240784, + "loss_ce": 0.002210987964645028, + "loss_iou": 0.18359375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 664990620, + "step": 6858 + }, + { + "epoch": 0.6706100899491592, + "grad_norm": 2.963925612896743, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 665087924, + "step": 6859 + }, + { + "epoch": 0.6706100899491592, + "loss": 0.08647044003009796, + "loss_ce": 0.004713849630206823, + "loss_iou": 0.3046875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 665087924, + "step": 6859 + }, + { + "epoch": 0.670707860774345, + "grad_norm": 6.986435796494868, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 665184644, + "step": 6860 + }, + { + "epoch": 0.670707860774345, + "loss": 0.03997710347175598, + "loss_ce": 0.0007925317622721195, + "loss_iou": 0.2314453125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 665184644, + "step": 6860 + }, + { + "epoch": 0.6708056315995307, + "grad_norm": 3.7479593399806217, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 665281548, + "step": 6861 + }, + { + "epoch": 0.6708056315995307, + "loss": 0.07621105760335922, + "loss_ce": 0.004326902329921722, + "loss_iou": 0.34765625, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 665281548, + "step": 6861 + }, + { + "epoch": 0.6709034024247165, + "grad_norm": 7.28516604025705, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 665377632, + "step": 6862 + }, + { + "epoch": 0.6709034024247165, + "loss": 0.05701260268688202, + "loss_ce": 0.011886640451848507, + "loss_iou": 0.1953125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 665377632, + "step": 6862 + }, + { + "epoch": 0.6710011732499023, + "grad_norm": 16.512615152328127, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 665474880, + "step": 6863 + }, + { + "epoch": 0.6710011732499023, + "loss": 0.08032669872045517, + "loss_ce": 0.0036970609799027443, + "loss_iou": 0.349609375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 665474880, + "step": 6863 + }, + { + "epoch": 0.671098944075088, + "grad_norm": 9.917152717913135, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 665571276, + "step": 6864 + }, + { + "epoch": 0.671098944075088, + "loss": 0.0859321653842926, + "loss_ce": 0.004446415230631828, + "loss_iou": 0.30859375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 665571276, + "step": 6864 + }, + { + "epoch": 0.6711967149002738, + "grad_norm": 12.12555353018536, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 665668628, + "step": 6865 + }, + { + "epoch": 0.6711967149002738, + "loss": 0.08332741260528564, + "loss_ce": 0.002638931619003415, + "loss_iou": 0.275390625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 665668628, + "step": 6865 + }, + { + "epoch": 0.6712944857254596, + "grad_norm": 14.846393902003484, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 665765620, + "step": 6866 + }, + { + "epoch": 0.6712944857254596, + "loss": 0.09523018449544907, + "loss_ce": 0.006973348557949066, + "loss_iou": 0.2734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 665765620, + "step": 6866 + }, + { + "epoch": 0.6713922565506453, + "grad_norm": 10.582671690081531, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 665862404, + "step": 6867 + }, + { + "epoch": 0.6713922565506453, + "loss": 0.07618740946054459, + "loss_ce": 0.0047839051112532616, + "loss_iou": 0.34765625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 665862404, + "step": 6867 + }, + { + "epoch": 0.6714900273758311, + "grad_norm": 7.388369819959703, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 665957596, + "step": 6868 + }, + { + "epoch": 0.6714900273758311, + "loss": 0.09170264005661011, + "loss_ce": 0.004971680231392384, + "loss_iou": 0.3125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 665957596, + "step": 6868 + }, + { + "epoch": 0.6715877982010168, + "grad_norm": 7.401362996912333, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 666054604, + "step": 6869 + }, + { + "epoch": 0.6715877982010168, + "loss": 0.10290344059467316, + "loss_ce": 0.011701662093400955, + "loss_iou": 0.326171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 666054604, + "step": 6869 + }, + { + "epoch": 0.6716855690262026, + "grad_norm": 9.892489762127552, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 666151752, + "step": 6870 + }, + { + "epoch": 0.6716855690262026, + "loss": 0.08830264210700989, + "loss_ce": 0.008087190799415112, + "loss_iou": 0.2470703125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 666151752, + "step": 6870 + }, + { + "epoch": 0.6717833398513884, + "grad_norm": 5.05675812061652, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 666248656, + "step": 6871 + }, + { + "epoch": 0.6717833398513884, + "loss": 0.08724057674407959, + "loss_ce": 0.002581002889201045, + "loss_iou": 0.29296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 666248656, + "step": 6871 + }, + { + "epoch": 0.6718811106765741, + "grad_norm": 13.365960101903548, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 666345708, + "step": 6872 + }, + { + "epoch": 0.6718811106765741, + "loss": 0.05659317970275879, + "loss_ce": 0.0021803397685289383, + "loss_iou": 0.326171875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 666345708, + "step": 6872 + }, + { + "epoch": 0.6719788815017599, + "grad_norm": 19.19079383197589, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 666443376, + "step": 6873 + }, + { + "epoch": 0.6719788815017599, + "loss": 0.06295615434646606, + "loss_ce": 0.004408176988363266, + "loss_iou": 0.29296875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 666443376, + "step": 6873 + }, + { + "epoch": 0.6720766523269457, + "grad_norm": 11.383007136530617, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 666539584, + "step": 6874 + }, + { + "epoch": 0.6720766523269457, + "loss": 0.11825805902481079, + "loss_ce": 0.006655283737927675, + "loss_iou": 0.359375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 666539584, + "step": 6874 + }, + { + "epoch": 0.6721744231521314, + "grad_norm": 30.723550405313276, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 666636576, + "step": 6875 + }, + { + "epoch": 0.6721744231521314, + "loss": 0.07192954421043396, + "loss_ce": 0.0026699048466980457, + "loss_iou": 0.2431640625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 666636576, + "step": 6875 + }, + { + "epoch": 0.6722721939773172, + "grad_norm": 3.1338718396847254, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 666734424, + "step": 6876 + }, + { + "epoch": 0.6722721939773172, + "loss": 0.10309606045484543, + "loss_ce": 0.006294309627264738, + "loss_iou": 0.28515625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 666734424, + "step": 6876 + }, + { + "epoch": 0.6723699648025029, + "grad_norm": 8.18335956682759, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 666830940, + "step": 6877 + }, + { + "epoch": 0.6723699648025029, + "loss": 0.07534876465797424, + "loss_ce": 0.006333262659609318, + "loss_iou": 0.306640625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 666830940, + "step": 6877 + }, + { + "epoch": 0.6724677356276887, + "grad_norm": 2.5112017882299904, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 666927936, + "step": 6878 + }, + { + "epoch": 0.6724677356276887, + "loss": 0.0665460079908371, + "loss_ce": 0.005022571422159672, + "loss_iou": 0.220703125, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 666927936, + "step": 6878 + }, + { + "epoch": 0.6725655064528745, + "grad_norm": 5.898544118223534, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 667024456, + "step": 6879 + }, + { + "epoch": 0.6725655064528745, + "loss": 0.06347671151161194, + "loss_ce": 0.009719999507069588, + "loss_iou": 0.31640625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 667024456, + "step": 6879 + }, + { + "epoch": 0.6726632772780602, + "grad_norm": 9.203450265990623, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 667122072, + "step": 6880 + }, + { + "epoch": 0.6726632772780602, + "loss": 0.07986149191856384, + "loss_ce": 0.004864538088440895, + "loss_iou": 0.37890625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 667122072, + "step": 6880 + }, + { + "epoch": 0.672761048103246, + "grad_norm": 9.999330661305795, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 667218896, + "step": 6881 + }, + { + "epoch": 0.672761048103246, + "loss": 0.07234521210193634, + "loss_ce": 0.007312256842851639, + "loss_iou": 0.296875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 667218896, + "step": 6881 + }, + { + "epoch": 0.6728588189284318, + "grad_norm": 5.450778463989202, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 667315828, + "step": 6882 + }, + { + "epoch": 0.6728588189284318, + "loss": 0.08155152946710587, + "loss_ce": 0.0061883688904345036, + "loss_iou": 0.248046875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 667315828, + "step": 6882 + }, + { + "epoch": 0.6729565897536175, + "grad_norm": 7.800522435350111, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 667412892, + "step": 6883 + }, + { + "epoch": 0.6729565897536175, + "loss": 0.07156701385974884, + "loss_ce": 0.003909542225301266, + "loss_iou": 0.302734375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 667412892, + "step": 6883 + }, + { + "epoch": 0.6730543605788033, + "grad_norm": 4.069042052131702, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 667508904, + "step": 6884 + }, + { + "epoch": 0.6730543605788033, + "loss": 0.07750121504068375, + "loss_ce": 0.009462270885705948, + "loss_iou": 0.267578125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 667508904, + "step": 6884 + }, + { + "epoch": 0.673152131403989, + "grad_norm": 5.902081783870663, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 667605776, + "step": 6885 + }, + { + "epoch": 0.673152131403989, + "loss": 0.07975434511899948, + "loss_ce": 0.004955760203301907, + "loss_iou": 0.328125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 667605776, + "step": 6885 + }, + { + "epoch": 0.6732499022291748, + "grad_norm": 4.381930174966153, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 667702992, + "step": 6886 + }, + { + "epoch": 0.6732499022291748, + "loss": 0.08438239991664886, + "loss_ce": 0.0031293416395783424, + "loss_iou": 0.37109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 667702992, + "step": 6886 + }, + { + "epoch": 0.6733476730543606, + "grad_norm": 13.602617003320047, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 667800416, + "step": 6887 + }, + { + "epoch": 0.6733476730543606, + "loss": 0.06580718606710434, + "loss_ce": 0.0015524220652878284, + "loss_iou": 0.251953125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 667800416, + "step": 6887 + }, + { + "epoch": 0.6734454438795463, + "grad_norm": 9.395585281134695, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 667898612, + "step": 6888 + }, + { + "epoch": 0.6734454438795463, + "loss": 0.09355851262807846, + "loss_ce": 0.006430827081203461, + "loss_iou": 0.390625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 667898612, + "step": 6888 + }, + { + "epoch": 0.6735432147047321, + "grad_norm": 8.692694255797763, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 667995432, + "step": 6889 + }, + { + "epoch": 0.6735432147047321, + "loss": 0.06079268828034401, + "loss_ce": 0.0042741321958601475, + "loss_iou": 0.2197265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 667995432, + "step": 6889 + }, + { + "epoch": 0.6736409855299179, + "grad_norm": 31.16128694473133, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 668091564, + "step": 6890 + }, + { + "epoch": 0.6736409855299179, + "loss": 0.07171188294887543, + "loss_ce": 0.005771024618297815, + "loss_iou": 0.203125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 668091564, + "step": 6890 + }, + { + "epoch": 0.6737387563551036, + "grad_norm": 11.498317764081463, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 668188228, + "step": 6891 + }, + { + "epoch": 0.6737387563551036, + "loss": 0.0952029898762703, + "loss_ce": 0.009570667520165443, + "loss_iou": 0.2236328125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 668188228, + "step": 6891 + }, + { + "epoch": 0.6738365271802894, + "grad_norm": 9.790505635406324, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 668285880, + "step": 6892 + }, + { + "epoch": 0.6738365271802894, + "loss": 0.10402710735797882, + "loss_ce": 0.005272223148494959, + "loss_iou": 0.294921875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 668285880, + "step": 6892 + }, + { + "epoch": 0.6739342980054752, + "grad_norm": 8.291777571659619, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 668382540, + "step": 6893 + }, + { + "epoch": 0.6739342980054752, + "loss": 0.0410863533616066, + "loss_ce": 0.006158985197544098, + "loss_iou": 0.22265625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 668382540, + "step": 6893 + }, + { + "epoch": 0.6740320688306609, + "grad_norm": 2.4545589229882516, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 668479516, + "step": 6894 + }, + { + "epoch": 0.6740320688306609, + "loss": 0.054081134498119354, + "loss_ce": 0.004055195488035679, + "loss_iou": 0.279296875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 668479516, + "step": 6894 + }, + { + "epoch": 0.6741298396558467, + "grad_norm": 9.222748606085847, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 668577260, + "step": 6895 + }, + { + "epoch": 0.6741298396558467, + "loss": 0.06876832246780396, + "loss_ce": 0.0075348010286688805, + "loss_iou": 0.318359375, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 668577260, + "step": 6895 + }, + { + "epoch": 0.6742276104810324, + "grad_norm": 6.796033904779886, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 668674708, + "step": 6896 + }, + { + "epoch": 0.6742276104810324, + "loss": 0.07472015172243118, + "loss_ce": 0.004606014583259821, + "loss_iou": 0.314453125, + "loss_num": 0.01397705078125, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 668674708, + "step": 6896 + }, + { + "epoch": 0.6743253813062182, + "grad_norm": 7.516229853237005, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 668771840, + "step": 6897 + }, + { + "epoch": 0.6743253813062182, + "loss": 0.07371395081281662, + "loss_ce": 0.005369832739233971, + "loss_iou": 0.296875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 668771840, + "step": 6897 + }, + { + "epoch": 0.674423152131404, + "grad_norm": 7.830612132970985, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 668868728, + "step": 6898 + }, + { + "epoch": 0.674423152131404, + "loss": 0.10124947130680084, + "loss_ce": 0.007034085690975189, + "loss_iou": 0.302734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 668868728, + "step": 6898 + }, + { + "epoch": 0.6745209229565897, + "grad_norm": 12.935828448905143, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 668965916, + "step": 6899 + }, + { + "epoch": 0.6745209229565897, + "loss": 0.08068737387657166, + "loss_ce": 0.0038288480136543512, + "loss_iou": 0.28515625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 668965916, + "step": 6899 + }, + { + "epoch": 0.6746186937817755, + "grad_norm": 9.822286630095453, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 669062436, + "step": 6900 + }, + { + "epoch": 0.6746186937817755, + "loss": 0.04714019596576691, + "loss_ce": 0.003084256313741207, + "loss_iou": 0.232421875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 669062436, + "step": 6900 + }, + { + "epoch": 0.6747164646069613, + "grad_norm": 12.987033639934802, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 669159812, + "step": 6901 + }, + { + "epoch": 0.6747164646069613, + "loss": 0.06637410819530487, + "loss_ce": 0.006170555483549833, + "loss_iou": 0.189453125, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 669159812, + "step": 6901 + }, + { + "epoch": 0.674814235432147, + "grad_norm": 7.820952106220772, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 669256820, + "step": 6902 + }, + { + "epoch": 0.674814235432147, + "loss": 0.0656345784664154, + "loss_ce": 0.005713312886655331, + "loss_iou": 0.41015625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 669256820, + "step": 6902 + }, + { + "epoch": 0.6749120062573328, + "grad_norm": 6.1995307021480865, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 669353244, + "step": 6903 + }, + { + "epoch": 0.6749120062573328, + "loss": 0.09395914524793625, + "loss_ce": 0.008006380870938301, + "loss_iou": 0.26953125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 669353244, + "step": 6903 + }, + { + "epoch": 0.6750097770825185, + "grad_norm": 7.356500039451525, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 669450312, + "step": 6904 + }, + { + "epoch": 0.6750097770825185, + "loss": 0.0636368989944458, + "loss_ce": 0.0016862101620063186, + "loss_iou": 0.294921875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 669450312, + "step": 6904 + }, + { + "epoch": 0.6751075479077043, + "grad_norm": 14.985828325642709, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 669547896, + "step": 6905 + }, + { + "epoch": 0.6751075479077043, + "loss": 0.07302489876747131, + "loss_ce": 0.004295495338737965, + "loss_iou": 0.26171875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 669547896, + "step": 6905 + }, + { + "epoch": 0.6752053187328901, + "grad_norm": 10.980306507057644, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 669644488, + "step": 6906 + }, + { + "epoch": 0.6752053187328901, + "loss": 0.040880586951971054, + "loss_ce": 0.0036720316857099533, + "loss_iou": 0.33203125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 669644488, + "step": 6906 + }, + { + "epoch": 0.6753030895580758, + "grad_norm": 3.810481250096831, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 669740884, + "step": 6907 + }, + { + "epoch": 0.6753030895580758, + "loss": 0.08442066609859467, + "loss_ce": 0.0037932205013930798, + "loss_iou": 0.32421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 669740884, + "step": 6907 + }, + { + "epoch": 0.6754008603832616, + "grad_norm": 11.771223427279724, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 669838724, + "step": 6908 + }, + { + "epoch": 0.6754008603832616, + "loss": 0.06142263486981392, + "loss_ce": 0.0028899156022816896, + "loss_iou": 0.34765625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 669838724, + "step": 6908 + }, + { + "epoch": 0.6754986312084474, + "grad_norm": 9.558415767313482, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 669936072, + "step": 6909 + }, + { + "epoch": 0.6754986312084474, + "loss": 0.08822180330753326, + "loss_ce": 0.0019943849183619022, + "loss_iou": 0.330078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 669936072, + "step": 6909 + }, + { + "epoch": 0.6755964020336331, + "grad_norm": 8.904525620812201, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 670032700, + "step": 6910 + }, + { + "epoch": 0.6755964020336331, + "loss": 0.0838981568813324, + "loss_ce": 0.0035911500453948975, + "loss_iou": 0.388671875, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 670032700, + "step": 6910 + }, + { + "epoch": 0.6756941728588189, + "grad_norm": 20.47027858598866, + "learning_rate": 5e-05, + "loss": 0.1421, + "num_input_tokens_seen": 670130412, + "step": 6911 + }, + { + "epoch": 0.6756941728588189, + "loss": 0.13763095438480377, + "loss_ce": 0.011486547067761421, + "loss_iou": 0.296875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 670130412, + "step": 6911 + }, + { + "epoch": 0.6757919436840047, + "grad_norm": 14.518625557861732, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 670227776, + "step": 6912 + }, + { + "epoch": 0.6757919436840047, + "loss": 0.06561537086963654, + "loss_ce": 0.00598402414470911, + "loss_iou": 0.265625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 670227776, + "step": 6912 + }, + { + "epoch": 0.6758897145091904, + "grad_norm": 2.2073926122990373, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 670324424, + "step": 6913 + }, + { + "epoch": 0.6758897145091904, + "loss": 0.07484273612499237, + "loss_ce": 0.004987998399883509, + "loss_iou": 0.234375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 670324424, + "step": 6913 + }, + { + "epoch": 0.6759874853343762, + "grad_norm": 6.0797172900220815, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 670421492, + "step": 6914 + }, + { + "epoch": 0.6759874853343762, + "loss": 0.080593541264534, + "loss_ce": 0.007900668308138847, + "loss_iou": 0.2236328125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 670421492, + "step": 6914 + }, + { + "epoch": 0.6760852561595619, + "grad_norm": 7.545448917325495, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 670517920, + "step": 6915 + }, + { + "epoch": 0.6760852561595619, + "loss": 0.06256268918514252, + "loss_ce": 0.005494819954037666, + "loss_iou": 0.291015625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 670517920, + "step": 6915 + }, + { + "epoch": 0.6761830269847477, + "grad_norm": 20.73240542035375, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 670615232, + "step": 6916 + }, + { + "epoch": 0.6761830269847477, + "loss": 0.06512514501810074, + "loss_ce": 0.007813135161995888, + "loss_iou": 0.296875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 670615232, + "step": 6916 + }, + { + "epoch": 0.6762807978099336, + "grad_norm": 9.233425908241896, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 670712920, + "step": 6917 + }, + { + "epoch": 0.6762807978099336, + "loss": 0.11401937901973724, + "loss_ce": 0.004156096838414669, + "loss_iou": 0.423828125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 670712920, + "step": 6917 + }, + { + "epoch": 0.6763785686351192, + "grad_norm": 6.278144549368412, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 670809752, + "step": 6918 + }, + { + "epoch": 0.6763785686351192, + "loss": 0.07007212936878204, + "loss_ce": 0.005111650563776493, + "loss_iou": 0.322265625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 670809752, + "step": 6918 + }, + { + "epoch": 0.676476339460305, + "grad_norm": 12.777132237235557, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 670906772, + "step": 6919 + }, + { + "epoch": 0.676476339460305, + "loss": 0.09165976196527481, + "loss_ce": 0.0037996515166014433, + "loss_iou": 0.30859375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 670906772, + "step": 6919 + }, + { + "epoch": 0.6765741102854909, + "grad_norm": 22.1220609514143, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 671004068, + "step": 6920 + }, + { + "epoch": 0.6765741102854909, + "loss": 0.12306101620197296, + "loss_ce": 0.004469705745577812, + "loss_iou": 0.28125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 671004068, + "step": 6920 + }, + { + "epoch": 0.6766718811106766, + "grad_norm": 25.27383335980737, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 671100504, + "step": 6921 + }, + { + "epoch": 0.6766718811106766, + "loss": 0.055628418922424316, + "loss_ce": 0.0023447286803275347, + "loss_iou": 0.328125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 671100504, + "step": 6921 + }, + { + "epoch": 0.6767696519358624, + "grad_norm": 3.425910461915379, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 671198456, + "step": 6922 + }, + { + "epoch": 0.6767696519358624, + "loss": 0.0926692932844162, + "loss_ce": 0.0095851831138134, + "loss_iou": 0.4453125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 671198456, + "step": 6922 + }, + { + "epoch": 0.676867422761048, + "grad_norm": 8.439511574093148, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 671295992, + "step": 6923 + }, + { + "epoch": 0.676867422761048, + "loss": 0.10672856122255325, + "loss_ce": 0.0061731403693556786, + "loss_iou": 0.251953125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 671295992, + "step": 6923 + }, + { + "epoch": 0.6769651935862339, + "grad_norm": 13.730958968654877, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 671393804, + "step": 6924 + }, + { + "epoch": 0.6769651935862339, + "loss": 0.0825139582157135, + "loss_ce": 0.00591483898460865, + "loss_iou": 0.2001953125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 671393804, + "step": 6924 + }, + { + "epoch": 0.6770629644114197, + "grad_norm": 26.02808007352479, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 671490500, + "step": 6925 + }, + { + "epoch": 0.6770629644114197, + "loss": 0.08581827580928802, + "loss_ce": 0.004214276093989611, + "loss_iou": 0.271484375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 671490500, + "step": 6925 + }, + { + "epoch": 0.6771607352366054, + "grad_norm": 5.7559845278570485, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 671587292, + "step": 6926 + }, + { + "epoch": 0.6771607352366054, + "loss": 0.034998804330825806, + "loss_ce": 0.004404929466545582, + "loss_iou": 0.203125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 671587292, + "step": 6926 + }, + { + "epoch": 0.6772585060617912, + "grad_norm": 12.2911019102713, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 671684092, + "step": 6927 + }, + { + "epoch": 0.6772585060617912, + "loss": 0.07707913219928741, + "loss_ce": 0.006339387036859989, + "loss_iou": 0.306640625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 671684092, + "step": 6927 + }, + { + "epoch": 0.677356276886977, + "grad_norm": 6.155224349226487, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 671780804, + "step": 6928 + }, + { + "epoch": 0.677356276886977, + "loss": 0.09031468629837036, + "loss_ce": 0.0067575592547655106, + "loss_iou": 0.32421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 671780804, + "step": 6928 + }, + { + "epoch": 0.6774540477121627, + "grad_norm": 8.136138260414675, + "learning_rate": 5e-05, + "loss": 0.1163, + "num_input_tokens_seen": 671877400, + "step": 6929 + }, + { + "epoch": 0.6774540477121627, + "loss": 0.12544932961463928, + "loss_ce": 0.006720697972923517, + "loss_iou": 0.2119140625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 671877400, + "step": 6929 + }, + { + "epoch": 0.6775518185373485, + "grad_norm": 4.277252520671936, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 671975532, + "step": 6930 + }, + { + "epoch": 0.6775518185373485, + "loss": 0.10590005666017532, + "loss_ce": 0.00583292031660676, + "loss_iou": 0.34375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 671975532, + "step": 6930 + }, + { + "epoch": 0.6776495893625342, + "grad_norm": 7.602235849163225, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 672072376, + "step": 6931 + }, + { + "epoch": 0.6776495893625342, + "loss": 0.077644944190979, + "loss_ce": 0.008247976191341877, + "loss_iou": 0.28125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 672072376, + "step": 6931 + }, + { + "epoch": 0.67774736018772, + "grad_norm": 5.008434176659398, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 672169616, + "step": 6932 + }, + { + "epoch": 0.67774736018772, + "loss": 0.08728599548339844, + "loss_ce": 0.0036296856123954058, + "loss_iou": 0.2578125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 672169616, + "step": 6932 + }, + { + "epoch": 0.6778451310129058, + "grad_norm": 15.481655509728057, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 672266448, + "step": 6933 + }, + { + "epoch": 0.6778451310129058, + "loss": 0.09398958086967468, + "loss_ce": 0.004054274410009384, + "loss_iou": 0.318359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 672266448, + "step": 6933 + }, + { + "epoch": 0.6779429018380915, + "grad_norm": 14.002468521951636, + "learning_rate": 5e-05, + "loss": 0.1049, + "num_input_tokens_seen": 672364080, + "step": 6934 + }, + { + "epoch": 0.6779429018380915, + "loss": 0.12341984361410141, + "loss_ce": 0.004721716977655888, + "loss_iou": 0.314453125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 672364080, + "step": 6934 + }, + { + "epoch": 0.6780406726632773, + "grad_norm": 36.866501656355084, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 672461420, + "step": 6935 + }, + { + "epoch": 0.6780406726632773, + "loss": 0.06009839475154877, + "loss_ce": 0.0026185396127402782, + "loss_iou": 0.341796875, + "loss_num": 0.01153564453125, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 672461420, + "step": 6935 + }, + { + "epoch": 0.6781384434884631, + "grad_norm": 28.365650169988882, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 672558700, + "step": 6936 + }, + { + "epoch": 0.6781384434884631, + "loss": 0.058925509452819824, + "loss_ce": 0.00667941477149725, + "loss_iou": 0.419921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 672558700, + "step": 6936 + }, + { + "epoch": 0.6782362143136488, + "grad_norm": 13.982551562499793, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 672655840, + "step": 6937 + }, + { + "epoch": 0.6782362143136488, + "loss": 0.04395611956715584, + "loss_ce": 0.0017197931883856654, + "loss_iou": 0.328125, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 672655840, + "step": 6937 + }, + { + "epoch": 0.6783339851388346, + "grad_norm": 5.864242486412339, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 672752548, + "step": 6938 + }, + { + "epoch": 0.6783339851388346, + "loss": 0.10221479088068008, + "loss_ce": 0.006763432174921036, + "loss_iou": 0.25, + "loss_num": 0.0191650390625, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 672752548, + "step": 6938 + }, + { + "epoch": 0.6784317559640204, + "grad_norm": 3.137821150192085, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 672849288, + "step": 6939 + }, + { + "epoch": 0.6784317559640204, + "loss": 0.04104926437139511, + "loss_ce": 0.0054581379517912865, + "loss_iou": 0.373046875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 672849288, + "step": 6939 + }, + { + "epoch": 0.6785295267892061, + "grad_norm": 9.866294473505825, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 672946612, + "step": 6940 + }, + { + "epoch": 0.6785295267892061, + "loss": 0.13517136871814728, + "loss_ce": 0.008401348255574703, + "loss_iou": 0.322265625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 672946612, + "step": 6940 + }, + { + "epoch": 0.6786272976143919, + "grad_norm": 4.67800144591025, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 673043988, + "step": 6941 + }, + { + "epoch": 0.6786272976143919, + "loss": 0.10059104859828949, + "loss_ce": 0.006306990049779415, + "loss_iou": 0.279296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 673043988, + "step": 6941 + }, + { + "epoch": 0.6787250684395776, + "grad_norm": 13.891589240062418, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 673140848, + "step": 6942 + }, + { + "epoch": 0.6787250684395776, + "loss": 0.09327731281518936, + "loss_ce": 0.004852630663663149, + "loss_iou": 0.3046875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 673140848, + "step": 6942 + }, + { + "epoch": 0.6788228392647634, + "grad_norm": 6.7545166254658, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 673237396, + "step": 6943 + }, + { + "epoch": 0.6788228392647634, + "loss": 0.07862923294305801, + "loss_ce": 0.005478602834045887, + "loss_iou": 0.36328125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 673237396, + "step": 6943 + }, + { + "epoch": 0.6789206100899492, + "grad_norm": 13.520430301391615, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 673333756, + "step": 6944 + }, + { + "epoch": 0.6789206100899492, + "loss": 0.042506877332925797, + "loss_ce": 0.0023152276407927275, + "loss_iou": 0.26171875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 673333756, + "step": 6944 + }, + { + "epoch": 0.6790183809151349, + "grad_norm": 4.639512904444045, + "learning_rate": 5e-05, + "loss": 0.05, + "num_input_tokens_seen": 673430140, + "step": 6945 + }, + { + "epoch": 0.6790183809151349, + "loss": 0.041772156953811646, + "loss_ce": 0.009080199524760246, + "loss_iou": 0.283203125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 673430140, + "step": 6945 + }, + { + "epoch": 0.6791161517403207, + "grad_norm": 8.794332569484721, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 673527412, + "step": 6946 + }, + { + "epoch": 0.6791161517403207, + "loss": 0.05600547045469284, + "loss_ce": 0.002355565782636404, + "loss_iou": 0.357421875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 673527412, + "step": 6946 + }, + { + "epoch": 0.6792139225655065, + "grad_norm": 4.185920219317279, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 673624080, + "step": 6947 + }, + { + "epoch": 0.6792139225655065, + "loss": 0.1220097690820694, + "loss_ce": 0.012405882589519024, + "loss_iou": 0.30078125, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 673624080, + "step": 6947 + }, + { + "epoch": 0.6793116933906922, + "grad_norm": 21.687313954117794, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 673720168, + "step": 6948 + }, + { + "epoch": 0.6793116933906922, + "loss": 0.10728016495704651, + "loss_ce": 0.007335095666348934, + "loss_iou": 0.220703125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 673720168, + "step": 6948 + }, + { + "epoch": 0.679409464215878, + "grad_norm": 15.098632254560135, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 673817176, + "step": 6949 + }, + { + "epoch": 0.679409464215878, + "loss": 0.08542738854885101, + "loss_ce": 0.0017634439282119274, + "loss_iou": 0.31640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 673817176, + "step": 6949 + }, + { + "epoch": 0.6795072350410637, + "grad_norm": 4.389227353172858, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 673914044, + "step": 6950 + }, + { + "epoch": 0.6795072350410637, + "loss": 0.07844407856464386, + "loss_ce": 0.006300525739789009, + "loss_iou": 0.267578125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 673914044, + "step": 6950 + }, + { + "epoch": 0.6796050058662495, + "grad_norm": 7.725378608187825, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 674011332, + "step": 6951 + }, + { + "epoch": 0.6796050058662495, + "loss": 0.10103273391723633, + "loss_ce": 0.007809162605553865, + "loss_iou": 0.30859375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 674011332, + "step": 6951 + }, + { + "epoch": 0.6797027766914353, + "grad_norm": 4.773083235310845, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 674108592, + "step": 6952 + }, + { + "epoch": 0.6797027766914353, + "loss": 0.0797165185213089, + "loss_ce": 0.0065277391113340855, + "loss_iou": 0.2373046875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 674108592, + "step": 6952 + }, + { + "epoch": 0.679800547516621, + "grad_norm": 2.722350985280003, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 674205536, + "step": 6953 + }, + { + "epoch": 0.679800547516621, + "loss": 0.08927246183156967, + "loss_ce": 0.004143674857914448, + "loss_iou": 0.205078125, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 674205536, + "step": 6953 + }, + { + "epoch": 0.6798983183418068, + "grad_norm": 5.417204095811182, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 674302272, + "step": 6954 + }, + { + "epoch": 0.6798983183418068, + "loss": 0.0711660087108612, + "loss_ce": 0.005690541118383408, + "loss_iou": 0.306640625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 674302272, + "step": 6954 + }, + { + "epoch": 0.6799960891669926, + "grad_norm": 6.597359588737543, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 674398300, + "step": 6955 + }, + { + "epoch": 0.6799960891669926, + "loss": 0.07610997557640076, + "loss_ce": 0.004660698119550943, + "loss_iou": 0.193359375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 674398300, + "step": 6955 + }, + { + "epoch": 0.6800938599921783, + "grad_norm": 3.183227693734313, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 674495268, + "step": 6956 + }, + { + "epoch": 0.6800938599921783, + "loss": 0.055460818111896515, + "loss_ce": 0.0069836461916565895, + "loss_iou": 0.33203125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 674495268, + "step": 6956 + }, + { + "epoch": 0.6801916308173641, + "grad_norm": 7.849661301524069, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 674591656, + "step": 6957 + }, + { + "epoch": 0.6801916308173641, + "loss": 0.08455289900302887, + "loss_ce": 0.011371748521924019, + "loss_iou": 0.1962890625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 674591656, + "step": 6957 + }, + { + "epoch": 0.6802894016425499, + "grad_norm": 9.878771543701859, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 674688284, + "step": 6958 + }, + { + "epoch": 0.6802894016425499, + "loss": 0.13998925685882568, + "loss_ce": 0.00491845840588212, + "loss_iou": 0.28125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 674688284, + "step": 6958 + }, + { + "epoch": 0.6803871724677356, + "grad_norm": 19.70633079288112, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 674784544, + "step": 6959 + }, + { + "epoch": 0.6803871724677356, + "loss": 0.058745406568050385, + "loss_ce": 0.003928209654986858, + "loss_iou": 0.263671875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 674784544, + "step": 6959 + }, + { + "epoch": 0.6804849432929214, + "grad_norm": 8.991811100552628, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 674881364, + "step": 6960 + }, + { + "epoch": 0.6804849432929214, + "loss": 0.08717472851276398, + "loss_ce": 0.00383122731000185, + "loss_iou": 0.380859375, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 674881364, + "step": 6960 + }, + { + "epoch": 0.6805827141181071, + "grad_norm": 3.5342376240592834, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 674976904, + "step": 6961 + }, + { + "epoch": 0.6805827141181071, + "loss": 0.06041795015335083, + "loss_ce": 0.008812726475298405, + "loss_iou": 0.27734375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 674976904, + "step": 6961 + }, + { + "epoch": 0.6806804849432929, + "grad_norm": 9.189285325523011, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 675074040, + "step": 6962 + }, + { + "epoch": 0.6806804849432929, + "loss": 0.07149596512317657, + "loss_ce": 0.00432676961645484, + "loss_iou": 0.375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 675074040, + "step": 6962 + }, + { + "epoch": 0.6807782557684787, + "grad_norm": 13.511544811269339, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 675170480, + "step": 6963 + }, + { + "epoch": 0.6807782557684787, + "loss": 0.05592453479766846, + "loss_ce": 0.0029155011288821697, + "loss_iou": 0.251953125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 675170480, + "step": 6963 + }, + { + "epoch": 0.6808760265936644, + "grad_norm": 4.907143305010663, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 675266376, + "step": 6964 + }, + { + "epoch": 0.6808760265936644, + "loss": 0.07615697383880615, + "loss_ce": 0.0051578339189291, + "loss_iou": 0.2001953125, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 675266376, + "step": 6964 + }, + { + "epoch": 0.6809737974188502, + "grad_norm": 5.708323525596756, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 675363192, + "step": 6965 + }, + { + "epoch": 0.6809737974188502, + "loss": 0.04548150673508644, + "loss_ce": 0.0064209140837192535, + "loss_iou": 0.2578125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 675363192, + "step": 6965 + }, + { + "epoch": 0.681071568244036, + "grad_norm": 3.1938607778659858, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 675459688, + "step": 6966 + }, + { + "epoch": 0.681071568244036, + "loss": 0.041376855224370956, + "loss_ce": 0.0033328793942928314, + "loss_iou": 0.26953125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 675459688, + "step": 6966 + }, + { + "epoch": 0.6811693390692217, + "grad_norm": 8.628986940664673, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 675556320, + "step": 6967 + }, + { + "epoch": 0.6811693390692217, + "loss": 0.06785301864147186, + "loss_ce": 0.003521962556988001, + "loss_iou": 0.2041015625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 675556320, + "step": 6967 + }, + { + "epoch": 0.6812671098944075, + "grad_norm": 16.266939227063347, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 675652916, + "step": 6968 + }, + { + "epoch": 0.6812671098944075, + "loss": 0.07996928691864014, + "loss_ce": 0.00848186481744051, + "loss_iou": 0.33203125, + "loss_num": 0.01434326171875, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 675652916, + "step": 6968 + }, + { + "epoch": 0.6813648807195932, + "grad_norm": 26.691010417453757, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 675750104, + "step": 6969 + }, + { + "epoch": 0.6813648807195932, + "loss": 0.05496416240930557, + "loss_ce": 0.008428670465946198, + "loss_iou": 0.1884765625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 675750104, + "step": 6969 + }, + { + "epoch": 0.681462651544779, + "grad_norm": 6.583826227827703, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 675848172, + "step": 6970 + }, + { + "epoch": 0.681462651544779, + "loss": 0.06777921319007874, + "loss_ce": 0.0033718696795403957, + "loss_iou": 0.380859375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 675848172, + "step": 6970 + }, + { + "epoch": 0.6815604223699648, + "grad_norm": 4.027061655359469, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 675945656, + "step": 6971 + }, + { + "epoch": 0.6815604223699648, + "loss": 0.09684920310974121, + "loss_ce": 0.009141682647168636, + "loss_iou": 0.2373046875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 675945656, + "step": 6971 + }, + { + "epoch": 0.6816581931951505, + "grad_norm": 3.5259974371457297, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 676042024, + "step": 6972 + }, + { + "epoch": 0.6816581931951505, + "loss": 0.06322314590215683, + "loss_ce": 0.00781848095357418, + "loss_iou": 0.318359375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 676042024, + "step": 6972 + }, + { + "epoch": 0.6817559640203363, + "grad_norm": 5.775551527934953, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 676139344, + "step": 6973 + }, + { + "epoch": 0.6817559640203363, + "loss": 0.07472586631774902, + "loss_ce": 0.007495642639696598, + "loss_iou": 0.330078125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 676139344, + "step": 6973 + }, + { + "epoch": 0.6818537348455221, + "grad_norm": 11.424893774126888, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 676236068, + "step": 6974 + }, + { + "epoch": 0.6818537348455221, + "loss": 0.06535109877586365, + "loss_ce": 0.004544827155768871, + "loss_iou": 0.345703125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 676236068, + "step": 6974 + }, + { + "epoch": 0.6819515056707078, + "grad_norm": 16.09582059415431, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 676332556, + "step": 6975 + }, + { + "epoch": 0.6819515056707078, + "loss": 0.07016681879758835, + "loss_ce": 0.008750196546316147, + "loss_iou": 0.259765625, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 676332556, + "step": 6975 + }, + { + "epoch": 0.6820492764958936, + "grad_norm": 10.440631368743132, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 676430028, + "step": 6976 + }, + { + "epoch": 0.6820492764958936, + "loss": 0.04587685689330101, + "loss_ce": 0.003434537909924984, + "loss_iou": 0.298828125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 676430028, + "step": 6976 + }, + { + "epoch": 0.6821470473210793, + "grad_norm": 5.634331184799721, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 676528008, + "step": 6977 + }, + { + "epoch": 0.6821470473210793, + "loss": 0.0746239423751831, + "loss_ce": 0.002877113875001669, + "loss_iou": 0.4453125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 676528008, + "step": 6977 + }, + { + "epoch": 0.6822448181462651, + "grad_norm": 3.815938916728236, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 676624652, + "step": 6978 + }, + { + "epoch": 0.6822448181462651, + "loss": 0.07685376703739166, + "loss_ce": 0.00175000901799649, + "loss_iou": 0.423828125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 676624652, + "step": 6978 + }, + { + "epoch": 0.682342588971451, + "grad_norm": 16.03375571429564, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 676721652, + "step": 6979 + }, + { + "epoch": 0.682342588971451, + "loss": 0.07792429625988007, + "loss_ce": 0.0021186256781220436, + "loss_iou": 0.2421875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 676721652, + "step": 6979 + }, + { + "epoch": 0.6824403597966366, + "grad_norm": 7.25196235110427, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 676818856, + "step": 6980 + }, + { + "epoch": 0.6824403597966366, + "loss": 0.05190765857696533, + "loss_ce": 0.002892614807933569, + "loss_iou": 0.29296875, + "loss_num": 0.009765625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 676818856, + "step": 6980 + }, + { + "epoch": 0.6825381306218224, + "grad_norm": 2.483183749164345, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 676916076, + "step": 6981 + }, + { + "epoch": 0.6825381306218224, + "loss": 0.07008148729801178, + "loss_ce": 0.008176584728062153, + "loss_iou": 0.2294921875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 676916076, + "step": 6981 + }, + { + "epoch": 0.6826359014470083, + "grad_norm": 6.223636327345843, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 677012764, + "step": 6982 + }, + { + "epoch": 0.6826359014470083, + "loss": 0.06207321956753731, + "loss_ce": 0.0031208908185362816, + "loss_iou": 0.232421875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 677012764, + "step": 6982 + }, + { + "epoch": 0.682733672272194, + "grad_norm": 14.255361156296773, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 677110284, + "step": 6983 + }, + { + "epoch": 0.682733672272194, + "loss": 0.091289222240448, + "loss_ce": 0.005496677476912737, + "loss_iou": 0.271484375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 677110284, + "step": 6983 + }, + { + "epoch": 0.6828314430973798, + "grad_norm": 1.8657103450241546, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 677206520, + "step": 6984 + }, + { + "epoch": 0.6828314430973798, + "loss": 0.0926678404211998, + "loss_ce": 0.005593561567366123, + "loss_iou": 0.30859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 677206520, + "step": 6984 + }, + { + "epoch": 0.6829292139225656, + "grad_norm": 27.236865546996427, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 677303440, + "step": 6985 + }, + { + "epoch": 0.6829292139225656, + "loss": 0.0418865792453289, + "loss_ce": 0.004929793532937765, + "loss_iou": 0.265625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 677303440, + "step": 6985 + }, + { + "epoch": 0.6830269847477513, + "grad_norm": 14.093389846968014, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 677400012, + "step": 6986 + }, + { + "epoch": 0.6830269847477513, + "loss": 0.06857489049434662, + "loss_ce": 0.005250923801213503, + "loss_iou": 0.23828125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 677400012, + "step": 6986 + }, + { + "epoch": 0.6831247555729371, + "grad_norm": 13.111701578770967, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 677495712, + "step": 6987 + }, + { + "epoch": 0.6831247555729371, + "loss": 0.0686403289437294, + "loss_ce": 0.009935950860381126, + "loss_iou": 0.21875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 677495712, + "step": 6987 + }, + { + "epoch": 0.6832225263981228, + "grad_norm": 9.065247537312603, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 677593184, + "step": 6988 + }, + { + "epoch": 0.6832225263981228, + "loss": 0.09551571309566498, + "loss_ce": 0.007808196358382702, + "loss_iou": 0.283203125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 677593184, + "step": 6988 + }, + { + "epoch": 0.6833202972233086, + "grad_norm": 13.885943463228212, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 677690212, + "step": 6989 + }, + { + "epoch": 0.6833202972233086, + "loss": 0.060051191598176956, + "loss_ce": 0.00536369439214468, + "loss_iou": 0.28125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 677690212, + "step": 6989 + }, + { + "epoch": 0.6834180680484944, + "grad_norm": 4.13779540120169, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 677786544, + "step": 6990 + }, + { + "epoch": 0.6834180680484944, + "loss": 0.041663385927677155, + "loss_ce": 0.006278255023062229, + "loss_iou": 0.27734375, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 677786544, + "step": 6990 + }, + { + "epoch": 0.6835158388736801, + "grad_norm": 5.418515423653866, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 677883100, + "step": 6991 + }, + { + "epoch": 0.6835158388736801, + "loss": 0.11688001453876495, + "loss_ce": 0.005307753570377827, + "loss_iou": 0.26953125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 677883100, + "step": 6991 + }, + { + "epoch": 0.6836136096988659, + "grad_norm": 5.911658691487832, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 677980300, + "step": 6992 + }, + { + "epoch": 0.6836136096988659, + "loss": 0.043577827513217926, + "loss_ce": 0.004324591718614101, + "loss_iou": 0.2734375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 677980300, + "step": 6992 + }, + { + "epoch": 0.6837113805240517, + "grad_norm": 15.632166128561904, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 678078148, + "step": 6993 + }, + { + "epoch": 0.6837113805240517, + "loss": 0.08009345829486847, + "loss_ce": 0.0056458329781889915, + "loss_iou": 0.359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 678078148, + "step": 6993 + }, + { + "epoch": 0.6838091513492374, + "grad_norm": 13.90575374101859, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 678174656, + "step": 6994 + }, + { + "epoch": 0.6838091513492374, + "loss": 0.06974294781684875, + "loss_ce": 0.005381374154239893, + "loss_iou": 0.38671875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 678174656, + "step": 6994 + }, + { + "epoch": 0.6839069221744232, + "grad_norm": 7.668371831843061, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 678271768, + "step": 6995 + }, + { + "epoch": 0.6839069221744232, + "loss": 0.06748317182064056, + "loss_ce": 0.0036861717235296965, + "loss_iou": 0.384765625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 678271768, + "step": 6995 + }, + { + "epoch": 0.6840046929996089, + "grad_norm": 15.385403011505087, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 678368524, + "step": 6996 + }, + { + "epoch": 0.6840046929996089, + "loss": 0.09411794692277908, + "loss_ce": 0.0032213453669101, + "loss_iou": 0.3671875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 678368524, + "step": 6996 + }, + { + "epoch": 0.6841024638247947, + "grad_norm": 12.592873530896727, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 678465444, + "step": 6997 + }, + { + "epoch": 0.6841024638247947, + "loss": 0.05807969346642494, + "loss_ce": 0.002919170306995511, + "loss_iou": 0.279296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 678465444, + "step": 6997 + }, + { + "epoch": 0.6842002346499805, + "grad_norm": 14.969841579035013, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 678562820, + "step": 6998 + }, + { + "epoch": 0.6842002346499805, + "loss": 0.06959697604179382, + "loss_ce": 0.00821086298674345, + "loss_iou": 0.404296875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 678562820, + "step": 6998 + }, + { + "epoch": 0.6842980054751662, + "grad_norm": 25.420287286149605, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 678659532, + "step": 6999 + }, + { + "epoch": 0.6842980054751662, + "loss": 0.1178719624876976, + "loss_ce": 0.003617962822318077, + "loss_iou": 0.2001953125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 678659532, + "step": 6999 + }, + { + "epoch": 0.684395776300352, + "grad_norm": 8.020041774293775, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 678756208, + "step": 7000 + }, + { + "epoch": 0.684395776300352, + "eval_seeclick_CIoU": 0.4975247383117676, + "eval_seeclick_GIoU": 0.5004715770483017, + "eval_seeclick_IoU": 0.5418936461210251, + "eval_seeclick_MAE_all": 0.0787421315908432, + "eval_seeclick_MAE_h": 0.05451355129480362, + "eval_seeclick_MAE_w": 0.10029840841889381, + "eval_seeclick_MAE_x": 0.10591598227620125, + "eval_seeclick_MAE_y": 0.05424055829644203, + "eval_seeclick_NUM_probability": 0.9999982714653015, + "eval_seeclick_inside_bbox": 0.7855113744735718, + "eval_seeclick_loss": 0.2594563663005829, + "eval_seeclick_loss_ce": 0.009565770160406828, + "eval_seeclick_loss_iou": 0.38360595703125, + "eval_seeclick_loss_num": 0.0489501953125, + "eval_seeclick_loss_xval": 0.244659423828125, + "eval_seeclick_runtime": 85.0008, + "eval_seeclick_samples_per_second": 0.506, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 678756208, + "step": 7000 + }, + { + "epoch": 0.684395776300352, + "eval_icons_CIoU": 0.6606223881244659, + "eval_icons_GIoU": 0.6528872847557068, + "eval_icons_IoU": 0.6977867484092712, + "eval_icons_MAE_all": 0.06895309314131737, + "eval_icons_MAE_h": 0.08187616616487503, + "eval_icons_MAE_w": 0.057510972023010254, + "eval_icons_MAE_x": 0.056887414306402206, + "eval_icons_MAE_y": 0.07953784614801407, + "eval_icons_NUM_probability": 0.9999984800815582, + "eval_icons_inside_bbox": 0.7916666567325592, + "eval_icons_loss": 0.20745304226875305, + "eval_icons_loss_ce": 2.0051846831847797e-06, + "eval_icons_loss_iou": 0.30767822265625, + "eval_icons_loss_num": 0.04430580139160156, + "eval_icons_loss_xval": 0.221405029296875, + "eval_icons_runtime": 89.5164, + "eval_icons_samples_per_second": 0.559, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 678756208, + "step": 7000 + }, + { + "epoch": 0.684395776300352, + "eval_screenspot_CIoU": 0.3143955071767171, + "eval_screenspot_GIoU": 0.28828298548857373, + "eval_screenspot_IoU": 0.4090271095434825, + "eval_screenspot_MAE_all": 0.16851886610190073, + "eval_screenspot_MAE_h": 0.1344292163848877, + "eval_screenspot_MAE_w": 0.21440544227759042, + "eval_screenspot_MAE_x": 0.18941879024108252, + "eval_screenspot_MAE_y": 0.13582206020752588, + "eval_screenspot_NUM_probability": 0.9999932646751404, + "eval_screenspot_inside_bbox": 0.6241666674613953, + "eval_screenspot_loss": 0.5832937359809875, + "eval_screenspot_loss_ce": 0.023720768590768177, + "eval_screenspot_loss_iou": 0.3334554036458333, + "eval_screenspot_loss_num": 0.11315409342447917, + "eval_screenspot_loss_xval": 0.5658772786458334, + "eval_screenspot_runtime": 148.7067, + "eval_screenspot_samples_per_second": 0.598, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 678756208, + "step": 7000 + }, + { + "epoch": 0.684395776300352, + "eval_compot_CIoU": 0.4822112172842026, + "eval_compot_GIoU": 0.45921680331230164, + "eval_compot_IoU": 0.5404074490070343, + "eval_compot_MAE_all": 0.09223130345344543, + "eval_compot_MAE_h": 0.07354448735713959, + "eval_compot_MAE_w": 0.11231109127402306, + "eval_compot_MAE_x": 0.10650189220905304, + "eval_compot_MAE_y": 0.07656773924827576, + "eval_compot_NUM_probability": 0.9999699592590332, + "eval_compot_inside_bbox": 0.7361111044883728, + "eval_compot_loss": 0.2937431335449219, + "eval_compot_loss_ce": 0.017927990294992924, + "eval_compot_loss_iou": 0.447509765625, + "eval_compot_loss_num": 0.048641204833984375, + "eval_compot_loss_xval": 0.242950439453125, + "eval_compot_runtime": 97.0815, + "eval_compot_samples_per_second": 0.515, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 678756208, + "step": 7000 + }, + { + "epoch": 0.684395776300352, + "loss": 0.24423012137413025, + "loss_ce": 0.019315578043460846, + "loss_iou": 0.435546875, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 678756208, + "step": 7000 + }, + { + "epoch": 0.6844935471255378, + "grad_norm": 6.878308409827251, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 678853112, + "step": 7001 + }, + { + "epoch": 0.6844935471255378, + "loss": 0.06067919731140137, + "loss_ce": 0.002665281295776367, + "loss_iou": 0.30859375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 678853112, + "step": 7001 + }, + { + "epoch": 0.6845913179507235, + "grad_norm": 10.40745737334973, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 678949648, + "step": 7002 + }, + { + "epoch": 0.6845913179507235, + "loss": 0.07718458771705627, + "loss_ce": 0.0027064443565905094, + "loss_iou": 0.2734375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 678949648, + "step": 7002 + }, + { + "epoch": 0.6846890887759093, + "grad_norm": 7.7322694108099945, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 679047364, + "step": 7003 + }, + { + "epoch": 0.6846890887759093, + "loss": 0.0798041969537735, + "loss_ce": 0.005875363014638424, + "loss_iou": 0.294921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 679047364, + "step": 7003 + }, + { + "epoch": 0.6847868596010951, + "grad_norm": 30.190840873186055, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 679144780, + "step": 7004 + }, + { + "epoch": 0.6847868596010951, + "loss": 0.08422932028770447, + "loss_ce": 0.006226387806236744, + "loss_iou": 0.33203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 679144780, + "step": 7004 + }, + { + "epoch": 0.6848846304262808, + "grad_norm": 28.30325709349653, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 679241236, + "step": 7005 + }, + { + "epoch": 0.6848846304262808, + "loss": 0.07240749895572662, + "loss_ce": 0.004627956077456474, + "loss_iou": 0.28125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 679241236, + "step": 7005 + }, + { + "epoch": 0.6849824012514666, + "grad_norm": 18.44020084233518, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 679337912, + "step": 7006 + }, + { + "epoch": 0.6849824012514666, + "loss": 0.0689011812210083, + "loss_ce": 0.003273129928857088, + "loss_iou": 0.251953125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 679337912, + "step": 7006 + }, + { + "epoch": 0.6850801720766523, + "grad_norm": 13.236026262539381, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 679434468, + "step": 7007 + }, + { + "epoch": 0.6850801720766523, + "loss": 0.0696287676692009, + "loss_ce": 0.00664812047034502, + "loss_iou": 0.1875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 679434468, + "step": 7007 + }, + { + "epoch": 0.6851779429018381, + "grad_norm": 5.472209937394559, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 679531288, + "step": 7008 + }, + { + "epoch": 0.6851779429018381, + "loss": 0.06980645656585693, + "loss_ce": 0.006265044678002596, + "loss_iou": 0.2158203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 679531288, + "step": 7008 + }, + { + "epoch": 0.6852757137270239, + "grad_norm": 5.574079122134547, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 679628460, + "step": 7009 + }, + { + "epoch": 0.6852757137270239, + "loss": 0.0983189195394516, + "loss_ce": 0.011015765368938446, + "loss_iou": 0.318359375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 679628460, + "step": 7009 + }, + { + "epoch": 0.6853734845522096, + "grad_norm": 4.555109199754848, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 679725596, + "step": 7010 + }, + { + "epoch": 0.6853734845522096, + "loss": 0.090619295835495, + "loss_ce": 0.004895423538982868, + "loss_iou": 0.31640625, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 679725596, + "step": 7010 + }, + { + "epoch": 0.6854712553773954, + "grad_norm": 7.909779808785015, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 679823352, + "step": 7011 + }, + { + "epoch": 0.6854712553773954, + "loss": 0.06918482482433319, + "loss_ce": 0.0014510550536215305, + "loss_iou": 0.314453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 679823352, + "step": 7011 + }, + { + "epoch": 0.6855690262025812, + "grad_norm": 23.385223458300004, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 679920220, + "step": 7012 + }, + { + "epoch": 0.6855690262025812, + "loss": 0.12050379067659378, + "loss_ce": 0.012471560388803482, + "loss_iou": 0.326171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 679920220, + "step": 7012 + }, + { + "epoch": 0.6856667970277669, + "grad_norm": 28.63946160143656, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 680016080, + "step": 7013 + }, + { + "epoch": 0.6856667970277669, + "loss": 0.10061299800872803, + "loss_ce": 0.00625265296548605, + "loss_iou": 0.271484375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 680016080, + "step": 7013 + }, + { + "epoch": 0.6857645678529527, + "grad_norm": 16.949497669354162, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 680113360, + "step": 7014 + }, + { + "epoch": 0.6857645678529527, + "loss": 0.05609689652919769, + "loss_ce": 0.005857335403561592, + "loss_iou": 0.30078125, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 680113360, + "step": 7014 + }, + { + "epoch": 0.6858623386781384, + "grad_norm": 3.45853565928746, + "learning_rate": 5e-05, + "loss": 0.1222, + "num_input_tokens_seen": 680210308, + "step": 7015 + }, + { + "epoch": 0.6858623386781384, + "loss": 0.12737758457660675, + "loss_ce": 0.005566672887653112, + "loss_iou": 0.287109375, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 680210308, + "step": 7015 + }, + { + "epoch": 0.6859601095033242, + "grad_norm": 8.208052210515676, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 680307492, + "step": 7016 + }, + { + "epoch": 0.6859601095033242, + "loss": 0.06052296608686447, + "loss_ce": 0.0055608078837394714, + "loss_iou": 0.26953125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 680307492, + "step": 7016 + }, + { + "epoch": 0.68605788032851, + "grad_norm": 3.735002426249861, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 680404252, + "step": 7017 + }, + { + "epoch": 0.68605788032851, + "loss": 0.10284718871116638, + "loss_ce": 0.004126636777073145, + "loss_iou": 0.275390625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 680404252, + "step": 7017 + }, + { + "epoch": 0.6861556511536957, + "grad_norm": 10.940234583760997, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 680501212, + "step": 7018 + }, + { + "epoch": 0.6861556511536957, + "loss": 0.08067043125629425, + "loss_ce": 0.0038424241356551647, + "loss_iou": 0.275390625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 680501212, + "step": 7018 + }, + { + "epoch": 0.6862534219788815, + "grad_norm": 25.85779394522213, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 680598644, + "step": 7019 + }, + { + "epoch": 0.6862534219788815, + "loss": 0.10543306171894073, + "loss_ce": 0.0024514971300959587, + "loss_iou": 0.251953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 680598644, + "step": 7019 + }, + { + "epoch": 0.6863511928040673, + "grad_norm": 3.4461773310396784, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 680694896, + "step": 7020 + }, + { + "epoch": 0.6863511928040673, + "loss": 0.08485114574432373, + "loss_ce": 0.009907601401209831, + "loss_iou": 0.26953125, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 680694896, + "step": 7020 + }, + { + "epoch": 0.686448963629253, + "grad_norm": 13.293506731185984, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 680791156, + "step": 7021 + }, + { + "epoch": 0.686448963629253, + "loss": 0.06447300314903259, + "loss_ce": 0.00506481621414423, + "loss_iou": 0.2099609375, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 680791156, + "step": 7021 + }, + { + "epoch": 0.6865467344544388, + "grad_norm": 2.8405702535501494, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 680887236, + "step": 7022 + }, + { + "epoch": 0.6865467344544388, + "loss": 0.06655871868133545, + "loss_ce": 0.005828735884279013, + "loss_iou": 0.1962890625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 680887236, + "step": 7022 + }, + { + "epoch": 0.6866445052796245, + "grad_norm": 3.3219782092857946, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 680984556, + "step": 7023 + }, + { + "epoch": 0.6866445052796245, + "loss": 0.06001029908657074, + "loss_ce": 0.004777297377586365, + "loss_iou": 0.2734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 680984556, + "step": 7023 + }, + { + "epoch": 0.6867422761048103, + "grad_norm": 6.129227989649448, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 681081436, + "step": 7024 + }, + { + "epoch": 0.6867422761048103, + "loss": 0.05153370648622513, + "loss_ce": 0.0019884202629327774, + "loss_iou": 0.330078125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 681081436, + "step": 7024 + }, + { + "epoch": 0.6868400469299961, + "grad_norm": 12.648198650310679, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 681178192, + "step": 7025 + }, + { + "epoch": 0.6868400469299961, + "loss": 0.04415583983063698, + "loss_ce": 0.007214309647679329, + "loss_iou": 0.326171875, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 681178192, + "step": 7025 + }, + { + "epoch": 0.6869378177551818, + "grad_norm": 12.879946645379688, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 681274964, + "step": 7026 + }, + { + "epoch": 0.6869378177551818, + "loss": 0.0651300847530365, + "loss_ce": 0.005254599265754223, + "loss_iou": 0.259765625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 681274964, + "step": 7026 + }, + { + "epoch": 0.6870355885803676, + "grad_norm": 8.26632035059885, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 681372744, + "step": 7027 + }, + { + "epoch": 0.6870355885803676, + "loss": 0.07587134838104248, + "loss_ce": 0.004971378017216921, + "loss_iou": 0.267578125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 681372744, + "step": 7027 + }, + { + "epoch": 0.6871333594055534, + "grad_norm": 30.3147105144009, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 681469668, + "step": 7028 + }, + { + "epoch": 0.6871333594055534, + "loss": 0.08289848268032074, + "loss_ce": 0.00393424928188324, + "loss_iou": 0.3671875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 681469668, + "step": 7028 + }, + { + "epoch": 0.6872311302307391, + "grad_norm": 4.943118664304248, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 681567996, + "step": 7029 + }, + { + "epoch": 0.6872311302307391, + "loss": 0.05067639797925949, + "loss_ce": 0.0070972987450659275, + "loss_iou": 0.359375, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 681567996, + "step": 7029 + }, + { + "epoch": 0.6873289010559249, + "grad_norm": 15.479750417316836, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 681664608, + "step": 7030 + }, + { + "epoch": 0.6873289010559249, + "loss": 0.05754625052213669, + "loss_ce": 0.004293075762689114, + "loss_iou": 0.31640625, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 681664608, + "step": 7030 + }, + { + "epoch": 0.6874266718811107, + "grad_norm": 9.521668369999826, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 681761684, + "step": 7031 + }, + { + "epoch": 0.6874266718811107, + "loss": 0.04350034147500992, + "loss_ce": 0.0035375705920159817, + "loss_iou": 0.23828125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 681761684, + "step": 7031 + }, + { + "epoch": 0.6875244427062964, + "grad_norm": 9.844051460285966, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 681857988, + "step": 7032 + }, + { + "epoch": 0.6875244427062964, + "loss": 0.059314288198947906, + "loss_ce": 0.004409347660839558, + "loss_iou": 0.31640625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 681857988, + "step": 7032 + }, + { + "epoch": 0.6876222135314822, + "grad_norm": 14.889335033053182, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 681954844, + "step": 7033 + }, + { + "epoch": 0.6876222135314822, + "loss": 0.04096296802163124, + "loss_ce": 0.0045020910911262035, + "loss_iou": 0.2001953125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 681954844, + "step": 7033 + }, + { + "epoch": 0.6877199843566679, + "grad_norm": 7.202350010742384, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 682051924, + "step": 7034 + }, + { + "epoch": 0.6877199843566679, + "loss": 0.09875556826591492, + "loss_ce": 0.011429517529904842, + "loss_iou": 0.349609375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 682051924, + "step": 7034 + }, + { + "epoch": 0.6878177551818537, + "grad_norm": 4.376890557625205, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 682149288, + "step": 7035 + }, + { + "epoch": 0.6878177551818537, + "loss": 0.13454394042491913, + "loss_ce": 0.007499266415834427, + "loss_iou": 0.265625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 682149288, + "step": 7035 + }, + { + "epoch": 0.6879155260070395, + "grad_norm": 4.663826209343482, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 682246492, + "step": 7036 + }, + { + "epoch": 0.6879155260070395, + "loss": 0.05611379072070122, + "loss_ce": 0.004127095453441143, + "loss_iou": 0.310546875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 682246492, + "step": 7036 + }, + { + "epoch": 0.6880132968322252, + "grad_norm": 7.075223169024005, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 682343624, + "step": 7037 + }, + { + "epoch": 0.6880132968322252, + "loss": 0.07088949531316757, + "loss_ce": 0.0029192205984145403, + "loss_iou": 0.279296875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 682343624, + "step": 7037 + }, + { + "epoch": 0.688111067657411, + "grad_norm": 5.726818826161442, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 682440424, + "step": 7038 + }, + { + "epoch": 0.688111067657411, + "loss": 0.04198905825614929, + "loss_ce": 0.009915081784129143, + "loss_iou": 0.3203125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 682440424, + "step": 7038 + }, + { + "epoch": 0.6882088384825968, + "grad_norm": 6.907653187416667, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 682537624, + "step": 7039 + }, + { + "epoch": 0.6882088384825968, + "loss": 0.04059243202209473, + "loss_ce": 0.00762390810996294, + "loss_iou": 0.1943359375, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 682537624, + "step": 7039 + }, + { + "epoch": 0.6883066093077825, + "grad_norm": 8.013854441665105, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 682634656, + "step": 7040 + }, + { + "epoch": 0.6883066093077825, + "loss": 0.05143849179148674, + "loss_ce": 0.00447193905711174, + "loss_iou": 0.318359375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 682634656, + "step": 7040 + }, + { + "epoch": 0.6884043801329683, + "grad_norm": 17.383132485114047, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 682730844, + "step": 7041 + }, + { + "epoch": 0.6884043801329683, + "loss": 0.07658740878105164, + "loss_ce": 0.006854741834104061, + "loss_iou": 0.2734375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 682730844, + "step": 7041 + }, + { + "epoch": 0.688502150958154, + "grad_norm": 36.636664312357645, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 682827260, + "step": 7042 + }, + { + "epoch": 0.688502150958154, + "loss": 0.09783247113227844, + "loss_ce": 0.0055320486426353455, + "loss_iou": 0.2060546875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 682827260, + "step": 7042 + }, + { + "epoch": 0.6885999217833398, + "grad_norm": 6.5904104435914, + "learning_rate": 5e-05, + "loss": 0.1239, + "num_input_tokens_seen": 682924180, + "step": 7043 + }, + { + "epoch": 0.6885999217833398, + "loss": 0.13679952919483185, + "loss_ce": 0.009831148199737072, + "loss_iou": 0.3125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 682924180, + "step": 7043 + }, + { + "epoch": 0.6886976926085256, + "grad_norm": 11.106437187156986, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 683020172, + "step": 7044 + }, + { + "epoch": 0.6886976926085256, + "loss": 0.0926099419593811, + "loss_ce": 0.00993781816214323, + "loss_iou": 0.2890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 683020172, + "step": 7044 + }, + { + "epoch": 0.6887954634337113, + "grad_norm": 11.74628286322769, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 683116204, + "step": 7045 + }, + { + "epoch": 0.6887954634337113, + "loss": 0.07319559156894684, + "loss_ce": 0.001364841707982123, + "loss_iou": 0.1669921875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 683116204, + "step": 7045 + }, + { + "epoch": 0.6888932342588971, + "grad_norm": 19.319467947297326, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 683211908, + "step": 7046 + }, + { + "epoch": 0.6888932342588971, + "loss": 0.10074843466281891, + "loss_ce": 0.009821312502026558, + "loss_iou": 0.2216796875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 683211908, + "step": 7046 + }, + { + "epoch": 0.688991005084083, + "grad_norm": 5.2373957597336815, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 683308132, + "step": 7047 + }, + { + "epoch": 0.688991005084083, + "loss": 0.08403991162776947, + "loss_ce": 0.003854976734146476, + "loss_iou": 0.30859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 683308132, + "step": 7047 + }, + { + "epoch": 0.6890887759092686, + "grad_norm": 5.791303412169745, + "learning_rate": 5e-05, + "loss": 0.129, + "num_input_tokens_seen": 683405088, + "step": 7048 + }, + { + "epoch": 0.6890887759092686, + "loss": 0.12109065055847168, + "loss_ce": 0.006237752735614777, + "loss_iou": 0.33984375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 683405088, + "step": 7048 + }, + { + "epoch": 0.6891865467344545, + "grad_norm": 2.881660550739186, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 683501924, + "step": 7049 + }, + { + "epoch": 0.6891865467344545, + "loss": 0.06796931475400925, + "loss_ce": 0.0019597969949245453, + "loss_iou": 0.2158203125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 683501924, + "step": 7049 + }, + { + "epoch": 0.6892843175596403, + "grad_norm": 5.490943797806102, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 683598932, + "step": 7050 + }, + { + "epoch": 0.6892843175596403, + "loss": 0.08002500981092453, + "loss_ce": 0.006637865211814642, + "loss_iou": 0.283203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 683598932, + "step": 7050 + }, + { + "epoch": 0.689382088384826, + "grad_norm": 7.520521192390325, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 683695808, + "step": 7051 + }, + { + "epoch": 0.689382088384826, + "loss": 0.10323132574558258, + "loss_ce": 0.009694954380393028, + "loss_iou": 0.26171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 683695808, + "step": 7051 + }, + { + "epoch": 0.6894798592100118, + "grad_norm": 11.898103379833115, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 683792616, + "step": 7052 + }, + { + "epoch": 0.6894798592100118, + "loss": 0.10633192956447601, + "loss_ce": 0.008401019498705864, + "loss_iou": 0.30078125, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 683792616, + "step": 7052 + }, + { + "epoch": 0.6895776300351975, + "grad_norm": 25.071686967886734, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 683891316, + "step": 7053 + }, + { + "epoch": 0.6895776300351975, + "loss": 0.0965082198381424, + "loss_ce": 0.007045933976769447, + "loss_iou": 0.333984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 683891316, + "step": 7053 + }, + { + "epoch": 0.6896754008603833, + "grad_norm": 25.274470270467443, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 683988856, + "step": 7054 + }, + { + "epoch": 0.6896754008603833, + "loss": 0.10271795839071274, + "loss_ce": 0.0057636140845716, + "loss_iou": 0.34765625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 683988856, + "step": 7054 + }, + { + "epoch": 0.6897731716855691, + "grad_norm": 3.2100354960728046, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 684086008, + "step": 7055 + }, + { + "epoch": 0.6897731716855691, + "loss": 0.07550270110368729, + "loss_ce": 0.007005997933447361, + "loss_iou": 0.37890625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 684086008, + "step": 7055 + }, + { + "epoch": 0.6898709425107548, + "grad_norm": 4.336785075150425, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 684183428, + "step": 7056 + }, + { + "epoch": 0.6898709425107548, + "loss": 0.11848285794258118, + "loss_ce": 0.005674629472196102, + "loss_iou": 0.302734375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 684183428, + "step": 7056 + }, + { + "epoch": 0.6899687133359406, + "grad_norm": 19.426239348660143, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 684280572, + "step": 7057 + }, + { + "epoch": 0.6899687133359406, + "loss": 0.09643705189228058, + "loss_ce": 0.0049949465319514275, + "loss_iou": 0.287109375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 684280572, + "step": 7057 + }, + { + "epoch": 0.6900664841611264, + "grad_norm": 20.37708566329861, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 684377736, + "step": 7058 + }, + { + "epoch": 0.6900664841611264, + "loss": 0.1142260730266571, + "loss_ce": 0.0056140124797821045, + "loss_iou": 0.291015625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 684377736, + "step": 7058 + }, + { + "epoch": 0.6901642549863121, + "grad_norm": 6.018133668579392, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 684474096, + "step": 7059 + }, + { + "epoch": 0.6901642549863121, + "loss": 0.08680742233991623, + "loss_ce": 0.006546198856085539, + "loss_iou": 0.2578125, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 684474096, + "step": 7059 + }, + { + "epoch": 0.6902620258114979, + "grad_norm": 5.182189777635563, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 684570260, + "step": 7060 + }, + { + "epoch": 0.6902620258114979, + "loss": 0.06441764533519745, + "loss_ce": 0.003374860854819417, + "loss_iou": 0.25390625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 684570260, + "step": 7060 + }, + { + "epoch": 0.6903597966366836, + "grad_norm": 4.15000049809191, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 684667052, + "step": 7061 + }, + { + "epoch": 0.6903597966366836, + "loss": 0.041654571890830994, + "loss_ce": 0.00616262573748827, + "loss_iou": 0.2333984375, + "loss_num": 0.007110595703125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 684667052, + "step": 7061 + }, + { + "epoch": 0.6904575674618694, + "grad_norm": 7.57031978440828, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 684764252, + "step": 7062 + }, + { + "epoch": 0.6904575674618694, + "loss": 0.05302756279706955, + "loss_ce": 0.0037035243585705757, + "loss_iou": 0.2431640625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 684764252, + "step": 7062 + }, + { + "epoch": 0.6905553382870552, + "grad_norm": 5.489372044912572, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 684860732, + "step": 7063 + }, + { + "epoch": 0.6905553382870552, + "loss": 0.13205741345882416, + "loss_ce": 0.010902633890509605, + "loss_iou": 0.265625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 684860732, + "step": 7063 + }, + { + "epoch": 0.6906531091122409, + "grad_norm": 4.977488398631395, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 684957312, + "step": 7064 + }, + { + "epoch": 0.6906531091122409, + "loss": 0.03762170672416687, + "loss_ce": 0.0063106706365942955, + "loss_iou": 0.2265625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 684957312, + "step": 7064 + }, + { + "epoch": 0.6907508799374267, + "grad_norm": 29.464970383181388, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 685054408, + "step": 7065 + }, + { + "epoch": 0.6907508799374267, + "loss": 0.05656290054321289, + "loss_ce": 0.004889007657766342, + "loss_iou": 0.306640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 685054408, + "step": 7065 + }, + { + "epoch": 0.6908486507626125, + "grad_norm": 3.415113562379435, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 685150812, + "step": 7066 + }, + { + "epoch": 0.6908486507626125, + "loss": 0.07120530307292938, + "loss_ce": 0.005302592180669308, + "loss_iou": 0.2275390625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 685150812, + "step": 7066 + }, + { + "epoch": 0.6909464215877982, + "grad_norm": 5.025738284506379, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 685248560, + "step": 7067 + }, + { + "epoch": 0.6909464215877982, + "loss": 0.07528099417686462, + "loss_ce": 0.006219713017344475, + "loss_iou": 0.28515625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 685248560, + "step": 7067 + }, + { + "epoch": 0.691044192412984, + "grad_norm": 3.391196900823935, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 685345096, + "step": 7068 + }, + { + "epoch": 0.691044192412984, + "loss": 0.04026813432574272, + "loss_ce": 0.004631233401596546, + "loss_iou": 0.267578125, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 685345096, + "step": 7068 + }, + { + "epoch": 0.6911419632381697, + "grad_norm": 55.65765592641848, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 685441300, + "step": 7069 + }, + { + "epoch": 0.6911419632381697, + "loss": 0.06377454102039337, + "loss_ce": 0.005875064060091972, + "loss_iou": 0.1513671875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 685441300, + "step": 7069 + }, + { + "epoch": 0.6912397340633555, + "grad_norm": 2.181814057596097, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 685537976, + "step": 7070 + }, + { + "epoch": 0.6912397340633555, + "loss": 0.04578762874007225, + "loss_ce": 0.0046117836609482765, + "loss_iou": 0.3046875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 685537976, + "step": 7070 + }, + { + "epoch": 0.6913375048885413, + "grad_norm": 4.641427204854626, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 685635168, + "step": 7071 + }, + { + "epoch": 0.6913375048885413, + "loss": 0.09638907760381699, + "loss_ce": 0.003249431261792779, + "loss_iou": 0.28125, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 685635168, + "step": 7071 + }, + { + "epoch": 0.691435275713727, + "grad_norm": 6.285375085303947, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 685732232, + "step": 7072 + }, + { + "epoch": 0.691435275713727, + "loss": 0.09371989220380783, + "loss_ce": 0.005646164529025555, + "loss_iou": 0.2890625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 685732232, + "step": 7072 + }, + { + "epoch": 0.6915330465389128, + "grad_norm": 8.033268290517237, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 685829192, + "step": 7073 + }, + { + "epoch": 0.6915330465389128, + "loss": 0.04076221585273743, + "loss_ce": 0.0023405831307172775, + "loss_iou": 0.353515625, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 685829192, + "step": 7073 + }, + { + "epoch": 0.6916308173640986, + "grad_norm": 16.623347074038772, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 685925744, + "step": 7074 + }, + { + "epoch": 0.6916308173640986, + "loss": 0.09105797111988068, + "loss_ce": 0.008782580494880676, + "loss_iou": 0.34765625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 685925744, + "step": 7074 + }, + { + "epoch": 0.6917285881892843, + "grad_norm": 11.509864025732943, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 686022152, + "step": 7075 + }, + { + "epoch": 0.6917285881892843, + "loss": 0.0831124484539032, + "loss_ce": 0.006452294532209635, + "loss_iou": 0.34375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 686022152, + "step": 7075 + }, + { + "epoch": 0.6918263590144701, + "grad_norm": 9.99483802459264, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 686118360, + "step": 7076 + }, + { + "epoch": 0.6918263590144701, + "loss": 0.05095959082245827, + "loss_ce": 0.008429530076682568, + "loss_iou": 0.1884765625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 686118360, + "step": 7076 + }, + { + "epoch": 0.6919241298396559, + "grad_norm": 7.382465269199124, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 686216688, + "step": 7077 + }, + { + "epoch": 0.6919241298396559, + "loss": 0.10677137970924377, + "loss_ce": 0.0035914487671107054, + "loss_iou": 0.318359375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 686216688, + "step": 7077 + }, + { + "epoch": 0.6920219006648416, + "grad_norm": 5.3846498273265695, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 686313884, + "step": 7078 + }, + { + "epoch": 0.6920219006648416, + "loss": 0.07637910544872284, + "loss_ce": 0.005395222920924425, + "loss_iou": 0.3515625, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 686313884, + "step": 7078 + }, + { + "epoch": 0.6921196714900274, + "grad_norm": 4.710224953512527, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 686410724, + "step": 7079 + }, + { + "epoch": 0.6921196714900274, + "loss": 0.05908159911632538, + "loss_ce": 0.005828428082168102, + "loss_iou": 0.26953125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 686410724, + "step": 7079 + }, + { + "epoch": 0.6922174423152131, + "grad_norm": 5.386520690700106, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 686507892, + "step": 7080 + }, + { + "epoch": 0.6922174423152131, + "loss": 0.10766993463039398, + "loss_ce": 0.01092921756207943, + "loss_iou": 0.265625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 686507892, + "step": 7080 + }, + { + "epoch": 0.6923152131403989, + "grad_norm": 10.417470709618046, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 686604160, + "step": 7081 + }, + { + "epoch": 0.6923152131403989, + "loss": 0.12684190273284912, + "loss_ce": 0.006099096965044737, + "loss_iou": 0.3125, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 686604160, + "step": 7081 + }, + { + "epoch": 0.6924129839655847, + "grad_norm": 3.977538504380688, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 686701828, + "step": 7082 + }, + { + "epoch": 0.6924129839655847, + "loss": 0.07753069698810577, + "loss_ce": 0.0024116772692650557, + "loss_iou": 0.26171875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 686701828, + "step": 7082 + }, + { + "epoch": 0.6925107547907704, + "grad_norm": 4.467959583881375, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 686799676, + "step": 7083 + }, + { + "epoch": 0.6925107547907704, + "loss": 0.07703453302383423, + "loss_ce": 0.0042806328274309635, + "loss_iou": 0.361328125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 686799676, + "step": 7083 + }, + { + "epoch": 0.6926085256159562, + "grad_norm": 7.6034031520894745, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 686896420, + "step": 7084 + }, + { + "epoch": 0.6926085256159562, + "loss": 0.06012917682528496, + "loss_ce": 0.008035669103264809, + "loss_iou": 0.21484375, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 686896420, + "step": 7084 + }, + { + "epoch": 0.692706296441142, + "grad_norm": 5.137188614429156, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 686993564, + "step": 7085 + }, + { + "epoch": 0.692706296441142, + "loss": 0.07134392857551575, + "loss_ce": 0.007241757120937109, + "loss_iou": 0.263671875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 686993564, + "step": 7085 + }, + { + "epoch": 0.6928040672663277, + "grad_norm": 14.776277240930314, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 687089604, + "step": 7086 + }, + { + "epoch": 0.6928040672663277, + "loss": 0.07828991115093231, + "loss_ce": 0.004334369208663702, + "loss_iou": 0.1953125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 687089604, + "step": 7086 + }, + { + "epoch": 0.6929018380915135, + "grad_norm": 13.717460054719595, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 687186608, + "step": 7087 + }, + { + "epoch": 0.6929018380915135, + "loss": 0.045281294733285904, + "loss_ce": 0.0033730310387909412, + "loss_iou": 0.228515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 687186608, + "step": 7087 + }, + { + "epoch": 0.6929996089166992, + "grad_norm": 18.914764189912322, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 687283468, + "step": 7088 + }, + { + "epoch": 0.6929996089166992, + "loss": 0.12210585922002792, + "loss_ce": 0.007504724897444248, + "loss_iou": 0.37890625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 687283468, + "step": 7088 + }, + { + "epoch": 0.693097379741885, + "grad_norm": 19.380503888058232, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 687381048, + "step": 7089 + }, + { + "epoch": 0.693097379741885, + "loss": 0.08259306848049164, + "loss_ce": 0.002850631717592478, + "loss_iou": 0.283203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 687381048, + "step": 7089 + }, + { + "epoch": 0.6931951505670708, + "grad_norm": 2.324049654520148, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 687477848, + "step": 7090 + }, + { + "epoch": 0.6931951505670708, + "loss": 0.07426480948925018, + "loss_ce": 0.004936506971716881, + "loss_iou": 0.314453125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 687477848, + "step": 7090 + }, + { + "epoch": 0.6932929213922565, + "grad_norm": 9.588695015786392, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 687575148, + "step": 7091 + }, + { + "epoch": 0.6932929213922565, + "loss": 0.06368847191333771, + "loss_ce": 0.0017377894837409258, + "loss_iou": 0.298828125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 687575148, + "step": 7091 + }, + { + "epoch": 0.6933906922174423, + "grad_norm": 8.846385777153854, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 687671780, + "step": 7092 + }, + { + "epoch": 0.6933906922174423, + "loss": 0.04904329776763916, + "loss_ce": 0.005525233224034309, + "loss_iou": 0.177734375, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 687671780, + "step": 7092 + }, + { + "epoch": 0.6934884630426281, + "grad_norm": 10.002140709207072, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 687770380, + "step": 7093 + }, + { + "epoch": 0.6934884630426281, + "loss": 0.07273907959461212, + "loss_ce": 0.002777526620775461, + "loss_iou": 0.306640625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 687770380, + "step": 7093 + }, + { + "epoch": 0.6935862338678138, + "grad_norm": 19.038804104912856, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 687866828, + "step": 7094 + }, + { + "epoch": 0.6935862338678138, + "loss": 0.05985606461763382, + "loss_ce": 0.004756578244268894, + "loss_iou": 0.3203125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 687866828, + "step": 7094 + }, + { + "epoch": 0.6936840046929996, + "grad_norm": 12.366526159149625, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 687962984, + "step": 7095 + }, + { + "epoch": 0.6936840046929996, + "loss": 0.07577158510684967, + "loss_ce": 0.011707558296620846, + "loss_iou": 0.26953125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 687962984, + "step": 7095 + }, + { + "epoch": 0.6937817755181854, + "grad_norm": 7.191468876318007, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 688059744, + "step": 7096 + }, + { + "epoch": 0.6937817755181854, + "loss": 0.07139663398265839, + "loss_ce": 0.007935332134366035, + "loss_iou": 0.296875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 688059744, + "step": 7096 + }, + { + "epoch": 0.6938795463433711, + "grad_norm": 2.3584047669412036, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 688157276, + "step": 7097 + }, + { + "epoch": 0.6938795463433711, + "loss": 0.07626551389694214, + "loss_ce": 0.006212411914020777, + "loss_iou": 0.29296875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 688157276, + "step": 7097 + }, + { + "epoch": 0.6939773171685569, + "grad_norm": 9.22425625429853, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 688254376, + "step": 7098 + }, + { + "epoch": 0.6939773171685569, + "loss": 0.07547101378440857, + "loss_ce": 0.00608930503949523, + "loss_iou": 0.30078125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 688254376, + "step": 7098 + }, + { + "epoch": 0.6940750879937426, + "grad_norm": 32.25761139973602, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 688352256, + "step": 7099 + }, + { + "epoch": 0.6940750879937426, + "loss": 0.07138945162296295, + "loss_ce": 0.001473684678785503, + "loss_iou": 0.341796875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 688352256, + "step": 7099 + }, + { + "epoch": 0.6941728588189284, + "grad_norm": 12.958665552329233, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 688449776, + "step": 7100 + }, + { + "epoch": 0.6941728588189284, + "loss": 0.107151098549366, + "loss_ce": 0.00500876922160387, + "loss_iou": 0.28125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 688449776, + "step": 7100 + }, + { + "epoch": 0.6942706296441142, + "grad_norm": 11.769368889596992, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 688546596, + "step": 7101 + }, + { + "epoch": 0.6942706296441142, + "loss": 0.09169510006904602, + "loss_ce": 0.00999954529106617, + "loss_iou": 0.283203125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 688546596, + "step": 7101 + }, + { + "epoch": 0.6943684004692999, + "grad_norm": 30.75121314763884, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 688644284, + "step": 7102 + }, + { + "epoch": 0.6943684004692999, + "loss": 0.09701509773731232, + "loss_ce": 0.006728848442435265, + "loss_iou": 0.291015625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 688644284, + "step": 7102 + }, + { + "epoch": 0.6944661712944857, + "grad_norm": 17.232941328578594, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 688740332, + "step": 7103 + }, + { + "epoch": 0.6944661712944857, + "loss": 0.06335220485925674, + "loss_ce": 0.004869083873927593, + "loss_iou": 0.228515625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 688740332, + "step": 7103 + }, + { + "epoch": 0.6945639421196715, + "grad_norm": 22.783994499415794, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 688836840, + "step": 7104 + }, + { + "epoch": 0.6945639421196715, + "loss": 0.06489028036594391, + "loss_ce": 0.004671466536819935, + "loss_iou": 0.2490234375, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 688836840, + "step": 7104 + }, + { + "epoch": 0.6946617129448572, + "grad_norm": 14.794420567643702, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 688934276, + "step": 7105 + }, + { + "epoch": 0.6946617129448572, + "loss": 0.06847205758094788, + "loss_ce": 0.004629289265722036, + "loss_iou": 0.392578125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 688934276, + "step": 7105 + }, + { + "epoch": 0.694759483770043, + "grad_norm": 22.701216430632886, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 689031116, + "step": 7106 + }, + { + "epoch": 0.694759483770043, + "loss": 0.05880718305706978, + "loss_ce": 0.0053422944620251656, + "loss_iou": 0.2578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 689031116, + "step": 7106 + }, + { + "epoch": 0.6948572545952287, + "grad_norm": 7.085316242091148, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 689127388, + "step": 7107 + }, + { + "epoch": 0.6948572545952287, + "loss": 0.0765528678894043, + "loss_ce": 0.008468150161206722, + "loss_iou": 0.365234375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 689127388, + "step": 7107 + }, + { + "epoch": 0.6949550254204145, + "grad_norm": 7.650042100193598, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 689225376, + "step": 7108 + }, + { + "epoch": 0.6949550254204145, + "loss": 0.07965631783008575, + "loss_ce": 0.014486034400761127, + "loss_iou": 0.271484375, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 689225376, + "step": 7108 + }, + { + "epoch": 0.6950527962456003, + "grad_norm": 20.66695789744925, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 689323324, + "step": 7109 + }, + { + "epoch": 0.6950527962456003, + "loss": 0.08798475563526154, + "loss_ce": 0.007853222079575062, + "loss_iou": 0.333984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 689323324, + "step": 7109 + }, + { + "epoch": 0.695150567070786, + "grad_norm": 3.7443228289962445, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 689420884, + "step": 7110 + }, + { + "epoch": 0.695150567070786, + "loss": 0.06458692252635956, + "loss_ce": 0.0022547664120793343, + "loss_iou": 0.37109375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 689420884, + "step": 7110 + }, + { + "epoch": 0.6952483378959718, + "grad_norm": 6.889499670468244, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 689517476, + "step": 7111 + }, + { + "epoch": 0.6952483378959718, + "loss": 0.051609106361866, + "loss_ce": 0.006160799879580736, + "loss_iou": 0.28515625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 689517476, + "step": 7111 + }, + { + "epoch": 0.6953461087211577, + "grad_norm": 3.7970093640464473, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 689614940, + "step": 7112 + }, + { + "epoch": 0.6953461087211577, + "loss": 0.04862932860851288, + "loss_ce": 0.0039287046529352665, + "loss_iou": 0.296875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 689614940, + "step": 7112 + }, + { + "epoch": 0.6954438795463433, + "grad_norm": 5.31095232752519, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 689711920, + "step": 7113 + }, + { + "epoch": 0.6954438795463433, + "loss": 0.08537618815898895, + "loss_ce": 0.005652833729982376, + "loss_iou": 0.1298828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 689711920, + "step": 7113 + }, + { + "epoch": 0.6955416503715292, + "grad_norm": 8.926148199077451, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 689808284, + "step": 7114 + }, + { + "epoch": 0.6955416503715292, + "loss": 0.1252719610929489, + "loss_ce": 0.011136227287352085, + "loss_iou": 0.298828125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 689808284, + "step": 7114 + }, + { + "epoch": 0.6956394211967148, + "grad_norm": 4.214664923564924, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 689904276, + "step": 7115 + }, + { + "epoch": 0.6956394211967148, + "loss": 0.07535725831985474, + "loss_ce": 0.009378260932862759, + "loss_iou": 0.244140625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 689904276, + "step": 7115 + }, + { + "epoch": 0.6957371920219007, + "grad_norm": 14.331596520169622, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 690001172, + "step": 7116 + }, + { + "epoch": 0.6957371920219007, + "loss": 0.0868968740105629, + "loss_ce": 0.004434565547853708, + "loss_iou": 0.248046875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 690001172, + "step": 7116 + }, + { + "epoch": 0.6958349628470865, + "grad_norm": 5.982503982599565, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 690096908, + "step": 7117 + }, + { + "epoch": 0.6958349628470865, + "loss": 0.0722009465098381, + "loss_ce": 0.006221940740942955, + "loss_iou": 0.275390625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 690096908, + "step": 7117 + }, + { + "epoch": 0.6959327336722722, + "grad_norm": 8.003593706917625, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 690194160, + "step": 7118 + }, + { + "epoch": 0.6959327336722722, + "loss": 0.04961790144443512, + "loss_ce": 0.0029717832803726196, + "loss_iou": 0.314453125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 690194160, + "step": 7118 + }, + { + "epoch": 0.696030504497458, + "grad_norm": 8.584150423071767, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 690290692, + "step": 7119 + }, + { + "epoch": 0.696030504497458, + "loss": 0.07586206495761871, + "loss_ce": 0.006903787609189749, + "loss_iou": 0.23828125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 690290692, + "step": 7119 + }, + { + "epoch": 0.6961282753226438, + "grad_norm": 14.515623599229007, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 690388140, + "step": 7120 + }, + { + "epoch": 0.6961282753226438, + "loss": 0.05800337344408035, + "loss_ce": 0.0036515682004392147, + "loss_iou": 0.375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 690388140, + "step": 7120 + }, + { + "epoch": 0.6962260461478295, + "grad_norm": 4.414449639762076, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 690485104, + "step": 7121 + }, + { + "epoch": 0.6962260461478295, + "loss": 0.12113640457391739, + "loss_ce": 0.004421930760145187, + "loss_iou": 0.34375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 690485104, + "step": 7121 + }, + { + "epoch": 0.6963238169730153, + "grad_norm": 5.476908461372446, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 690582216, + "step": 7122 + }, + { + "epoch": 0.6963238169730153, + "loss": 0.06651097536087036, + "loss_ce": 0.0058191390708088875, + "loss_iou": 0.27734375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 690582216, + "step": 7122 + }, + { + "epoch": 0.6964215877982011, + "grad_norm": 5.861968616579425, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 690678308, + "step": 7123 + }, + { + "epoch": 0.6964215877982011, + "loss": 0.09570331871509552, + "loss_ce": 0.0037996256723999977, + "loss_iou": 0.291015625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 690678308, + "step": 7123 + }, + { + "epoch": 0.6965193586233868, + "grad_norm": 7.520138710756818, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 690775560, + "step": 7124 + }, + { + "epoch": 0.6965193586233868, + "loss": 0.0769713744521141, + "loss_ce": 0.003492676420137286, + "loss_iou": 0.251953125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 690775560, + "step": 7124 + }, + { + "epoch": 0.6966171294485726, + "grad_norm": 6.4308335299176616, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 690872268, + "step": 7125 + }, + { + "epoch": 0.6966171294485726, + "loss": 0.05748613178730011, + "loss_ce": 0.004927231930196285, + "loss_iou": 0.349609375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 690872268, + "step": 7125 + }, + { + "epoch": 0.6967149002737583, + "grad_norm": 13.973627810031475, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 690970420, + "step": 7126 + }, + { + "epoch": 0.6967149002737583, + "loss": 0.08658338338136673, + "loss_ce": 0.004155409522354603, + "loss_iou": 0.30078125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 690970420, + "step": 7126 + }, + { + "epoch": 0.6968126710989441, + "grad_norm": 8.255693448978622, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 691067176, + "step": 7127 + }, + { + "epoch": 0.6968126710989441, + "loss": 0.08777560293674469, + "loss_ce": 0.004157434217631817, + "loss_iou": 0.3125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 691067176, + "step": 7127 + }, + { + "epoch": 0.6969104419241299, + "grad_norm": 9.470134015702486, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 691163944, + "step": 7128 + }, + { + "epoch": 0.6969104419241299, + "loss": 0.06810168921947479, + "loss_ce": 0.004335205070674419, + "loss_iou": 0.341796875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 691163944, + "step": 7128 + }, + { + "epoch": 0.6970082127493156, + "grad_norm": 14.463344391850956, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 691261152, + "step": 7129 + }, + { + "epoch": 0.6970082127493156, + "loss": 0.056931111961603165, + "loss_ce": 0.005844687111675739, + "loss_iou": 0.310546875, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 691261152, + "step": 7129 + }, + { + "epoch": 0.6971059835745014, + "grad_norm": 20.784261380625818, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 691357628, + "step": 7130 + }, + { + "epoch": 0.6971059835745014, + "loss": 0.07024449110031128, + "loss_ce": 0.008721056394279003, + "loss_iou": 0.23046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 691357628, + "step": 7130 + }, + { + "epoch": 0.6972037543996872, + "grad_norm": 5.715881704733726, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 691454324, + "step": 7131 + }, + { + "epoch": 0.6972037543996872, + "loss": 0.062147077172994614, + "loss_ce": 0.0037059157621115446, + "loss_iou": 0.302734375, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 691454324, + "step": 7131 + }, + { + "epoch": 0.6973015252248729, + "grad_norm": 3.9138447984783817, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 691551536, + "step": 7132 + }, + { + "epoch": 0.6973015252248729, + "loss": 0.07246153056621552, + "loss_ce": 0.004590431693941355, + "loss_iou": 0.25, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 691551536, + "step": 7132 + }, + { + "epoch": 0.6973992960500587, + "grad_norm": 11.734552211427538, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 691648748, + "step": 7133 + }, + { + "epoch": 0.6973992960500587, + "loss": 0.06592466682195663, + "loss_ce": 0.005316757597029209, + "loss_iou": 0.37890625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 691648748, + "step": 7133 + }, + { + "epoch": 0.6974970668752444, + "grad_norm": 7.116855215069031, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 691746420, + "step": 7134 + }, + { + "epoch": 0.6974970668752444, + "loss": 0.09358170628547668, + "loss_ce": 0.007735758554190397, + "loss_iou": 0.3515625, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 691746420, + "step": 7134 + }, + { + "epoch": 0.6975948377004302, + "grad_norm": 4.021253059556674, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 691843036, + "step": 7135 + }, + { + "epoch": 0.6975948377004302, + "loss": 0.06614009290933609, + "loss_ce": 0.0024651638232171535, + "loss_iou": 0.2890625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 691843036, + "step": 7135 + }, + { + "epoch": 0.697692608525616, + "grad_norm": 27.91372447408355, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 691939112, + "step": 7136 + }, + { + "epoch": 0.697692608525616, + "loss": 0.05875764042139053, + "loss_ce": 0.00391754973679781, + "loss_iou": 0.23828125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 691939112, + "step": 7136 + }, + { + "epoch": 0.6977903793508017, + "grad_norm": 7.376932298907762, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 692035984, + "step": 7137 + }, + { + "epoch": 0.6977903793508017, + "loss": 0.0863945260643959, + "loss_ce": 0.007720206864178181, + "loss_iou": 0.369140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 692035984, + "step": 7137 + }, + { + "epoch": 0.6978881501759875, + "grad_norm": 9.322056086287812, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 692132484, + "step": 7138 + }, + { + "epoch": 0.6978881501759875, + "loss": 0.051820628345012665, + "loss_ce": 0.002626292873173952, + "loss_iou": 0.283203125, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 692132484, + "step": 7138 + }, + { + "epoch": 0.6979859210011733, + "grad_norm": 10.758031099199467, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 692230456, + "step": 7139 + }, + { + "epoch": 0.6979859210011733, + "loss": 0.06493273377418518, + "loss_ce": 0.004752074833959341, + "loss_iou": 0.296875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 692230456, + "step": 7139 + }, + { + "epoch": 0.698083691826359, + "grad_norm": 11.656106018706216, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 692327628, + "step": 7140 + }, + { + "epoch": 0.698083691826359, + "loss": 0.07160896062850952, + "loss_ce": 0.00825447216629982, + "loss_iou": 0.267578125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 692327628, + "step": 7140 + }, + { + "epoch": 0.6981814626515448, + "grad_norm": 8.27869389592091, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 692423696, + "step": 7141 + }, + { + "epoch": 0.6981814626515448, + "loss": 0.0753842443227768, + "loss_ce": 0.0033703879453241825, + "loss_iou": 0.201171875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 692423696, + "step": 7141 + }, + { + "epoch": 0.6982792334767306, + "grad_norm": 4.340496248447207, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 692519800, + "step": 7142 + }, + { + "epoch": 0.6982792334767306, + "loss": 0.07914622128009796, + "loss_ce": 0.0061596161685884, + "loss_iou": 0.1767578125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 692519800, + "step": 7142 + }, + { + "epoch": 0.6983770043019163, + "grad_norm": 8.930269149684856, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 692616796, + "step": 7143 + }, + { + "epoch": 0.6983770043019163, + "loss": 0.0828142985701561, + "loss_ce": 0.0043192715384066105, + "loss_iou": 0.2265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 692616796, + "step": 7143 + }, + { + "epoch": 0.6984747751271021, + "grad_norm": 8.161954611024148, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 692713736, + "step": 7144 + }, + { + "epoch": 0.6984747751271021, + "loss": 0.09580469131469727, + "loss_ce": 0.006128787994384766, + "loss_iou": 0.26171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 692713736, + "step": 7144 + }, + { + "epoch": 0.6985725459522878, + "grad_norm": 11.84476694991077, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 692810416, + "step": 7145 + }, + { + "epoch": 0.6985725459522878, + "loss": 0.034488264471292496, + "loss_ce": 0.004100385122001171, + "loss_iou": 0.1923828125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 692810416, + "step": 7145 + }, + { + "epoch": 0.6986703167774736, + "grad_norm": 9.213019407855144, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 692907580, + "step": 7146 + }, + { + "epoch": 0.6986703167774736, + "loss": 0.05376225709915161, + "loss_ce": 0.002736867405474186, + "loss_iou": 0.2734375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 692907580, + "step": 7146 + }, + { + "epoch": 0.6987680876026594, + "grad_norm": 6.058128973783294, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 693005480, + "step": 7147 + }, + { + "epoch": 0.6987680876026594, + "loss": 0.08347329497337341, + "loss_ce": 0.002586448099464178, + "loss_iou": 0.3359375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 693005480, + "step": 7147 + }, + { + "epoch": 0.6988658584278451, + "grad_norm": 13.867286149948256, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 693101708, + "step": 7148 + }, + { + "epoch": 0.6988658584278451, + "loss": 0.061828725039958954, + "loss_ce": 0.005241504870355129, + "loss_iou": 0.291015625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 693101708, + "step": 7148 + }, + { + "epoch": 0.6989636292530309, + "grad_norm": 16.172803853520357, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 693199584, + "step": 7149 + }, + { + "epoch": 0.6989636292530309, + "loss": 0.04962260276079178, + "loss_ce": 0.005143230315297842, + "loss_iou": 0.318359375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 693199584, + "step": 7149 + }, + { + "epoch": 0.6990614000782167, + "grad_norm": 45.4220506988101, + "learning_rate": 5e-05, + "loss": 0.0952, + "num_input_tokens_seen": 693296972, + "step": 7150 + }, + { + "epoch": 0.6990614000782167, + "loss": 0.08949784934520721, + "loss_ce": 0.004994679242372513, + "loss_iou": 0.2890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 693296972, + "step": 7150 + }, + { + "epoch": 0.6991591709034024, + "grad_norm": 37.54024911370374, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 693394608, + "step": 7151 + }, + { + "epoch": 0.6991591709034024, + "loss": 0.11868730187416077, + "loss_ce": 0.009495404548943043, + "loss_iou": 0.2578125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 693394608, + "step": 7151 + }, + { + "epoch": 0.6992569417285882, + "grad_norm": 5.088638089216045, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 693491896, + "step": 7152 + }, + { + "epoch": 0.6992569417285882, + "loss": 0.14494724571704865, + "loss_ce": 0.007572373375296593, + "loss_iou": 0.21875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 693491896, + "step": 7152 + }, + { + "epoch": 0.6993547125537739, + "grad_norm": 7.599339889315194, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 693588860, + "step": 7153 + }, + { + "epoch": 0.6993547125537739, + "loss": 0.07601127028465271, + "loss_ce": 0.002914038486778736, + "loss_iou": 0.1904296875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 693588860, + "step": 7153 + }, + { + "epoch": 0.6994524833789597, + "grad_norm": 7.2310335458718145, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 693684952, + "step": 7154 + }, + { + "epoch": 0.6994524833789597, + "loss": 0.04916628077626228, + "loss_ce": 0.008715230971574783, + "loss_iou": 0.29296875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 693684952, + "step": 7154 + }, + { + "epoch": 0.6995502542041455, + "grad_norm": 3.796085877848766, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 693782948, + "step": 7155 + }, + { + "epoch": 0.6995502542041455, + "loss": 0.12242842465639114, + "loss_ce": 0.008201124146580696, + "loss_iou": 0.314453125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 693782948, + "step": 7155 + }, + { + "epoch": 0.6996480250293312, + "grad_norm": 5.657927576919576, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 693880776, + "step": 7156 + }, + { + "epoch": 0.6996480250293312, + "loss": 0.07674837112426758, + "loss_ce": 0.00465059420093894, + "loss_iou": 0.279296875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 693880776, + "step": 7156 + }, + { + "epoch": 0.699745795854517, + "grad_norm": 11.303620855911428, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 693978184, + "step": 7157 + }, + { + "epoch": 0.699745795854517, + "loss": 0.053204938769340515, + "loss_ce": 0.0059866150841116905, + "loss_iou": 0.30078125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 693978184, + "step": 7157 + }, + { + "epoch": 0.6998435666797028, + "grad_norm": 13.567463095305373, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 694075524, + "step": 7158 + }, + { + "epoch": 0.6998435666797028, + "loss": 0.05140101537108421, + "loss_ce": 0.002328747883439064, + "loss_iou": 0.3515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 694075524, + "step": 7158 + }, + { + "epoch": 0.6999413375048885, + "grad_norm": 10.438876606026492, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 694173400, + "step": 7159 + }, + { + "epoch": 0.6999413375048885, + "loss": 0.05207686871290207, + "loss_ce": 0.004286341834813356, + "loss_iou": 0.318359375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 694173400, + "step": 7159 + }, + { + "epoch": 0.7000391083300743, + "grad_norm": 14.784933898489605, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 694269636, + "step": 7160 + }, + { + "epoch": 0.7000391083300743, + "loss": 0.0807642936706543, + "loss_ce": 0.005187507253140211, + "loss_iou": 0.30859375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 694269636, + "step": 7160 + }, + { + "epoch": 0.70013687915526, + "grad_norm": 5.261344936860538, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 694366904, + "step": 7161 + }, + { + "epoch": 0.70013687915526, + "loss": 0.09891166538000107, + "loss_ce": 0.00415458669885993, + "loss_iou": 0.341796875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 694366904, + "step": 7161 + }, + { + "epoch": 0.7002346499804458, + "grad_norm": 5.008669366545751, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 694463584, + "step": 7162 + }, + { + "epoch": 0.7002346499804458, + "loss": 0.05227820575237274, + "loss_ce": 0.005128550808876753, + "loss_iou": 0.236328125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 694463584, + "step": 7162 + }, + { + "epoch": 0.7003324208056316, + "grad_norm": 24.72468191363158, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 694560624, + "step": 7163 + }, + { + "epoch": 0.7003324208056316, + "loss": 0.07136502861976624, + "loss_ce": 0.004790928214788437, + "loss_iou": 0.33203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 694560624, + "step": 7163 + }, + { + "epoch": 0.7004301916308173, + "grad_norm": 18.74770576178954, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 694657356, + "step": 7164 + }, + { + "epoch": 0.7004301916308173, + "loss": 0.06715933978557587, + "loss_ce": 0.002622290514409542, + "loss_iou": 0.29296875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 694657356, + "step": 7164 + }, + { + "epoch": 0.7005279624560031, + "grad_norm": 11.055262504955866, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 694755172, + "step": 7165 + }, + { + "epoch": 0.7005279624560031, + "loss": 0.06672318279743195, + "loss_ce": 0.008068396709859371, + "loss_iou": 0.248046875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 694755172, + "step": 7165 + }, + { + "epoch": 0.7006257332811889, + "grad_norm": 8.819040887648075, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 694851988, + "step": 7166 + }, + { + "epoch": 0.7006257332811889, + "loss": 0.04780682176351547, + "loss_ce": 0.005723080597817898, + "loss_iou": 0.337890625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 694851988, + "step": 7166 + }, + { + "epoch": 0.7007235041063746, + "grad_norm": 13.721805536376316, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 694948448, + "step": 7167 + }, + { + "epoch": 0.7007235041063746, + "loss": 0.033902887254953384, + "loss_ce": 0.0028359934221953154, + "loss_iou": 0.34375, + "loss_num": 0.0062255859375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 694948448, + "step": 7167 + }, + { + "epoch": 0.7008212749315604, + "grad_norm": 10.222413437064386, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 695045460, + "step": 7168 + }, + { + "epoch": 0.7008212749315604, + "loss": 0.05762546509504318, + "loss_ce": 0.0012709435541182756, + "loss_iou": 0.318359375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 695045460, + "step": 7168 + }, + { + "epoch": 0.7009190457567462, + "grad_norm": 5.451546671589632, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 695141644, + "step": 7169 + }, + { + "epoch": 0.7009190457567462, + "loss": 0.09214431047439575, + "loss_ce": 0.007976831868290901, + "loss_iou": 0.287109375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 695141644, + "step": 7169 + }, + { + "epoch": 0.7010168165819319, + "grad_norm": 5.929997141604927, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 695237912, + "step": 7170 + }, + { + "epoch": 0.7010168165819319, + "loss": 0.054085928946733475, + "loss_ce": 0.0018875182140618563, + "loss_iou": 0.234375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 695237912, + "step": 7170 + }, + { + "epoch": 0.7011145874071177, + "grad_norm": 5.972532971208995, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 695334408, + "step": 7171 + }, + { + "epoch": 0.7011145874071177, + "loss": 0.06972520798444748, + "loss_ce": 0.00525300670415163, + "loss_iou": 0.2060546875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 695334408, + "step": 7171 + }, + { + "epoch": 0.7012123582323034, + "grad_norm": 8.4192370998285, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 695430584, + "step": 7172 + }, + { + "epoch": 0.7012123582323034, + "loss": 0.11217284947633743, + "loss_ce": 0.005101926624774933, + "loss_iou": 0.263671875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 695430584, + "step": 7172 + }, + { + "epoch": 0.7013101290574892, + "grad_norm": 17.676527177361848, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 695526628, + "step": 7173 + }, + { + "epoch": 0.7013101290574892, + "loss": 0.056803688406944275, + "loss_ce": 0.005770668853074312, + "loss_iou": 0.25390625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 695526628, + "step": 7173 + }, + { + "epoch": 0.701407899882675, + "grad_norm": 8.880773184990838, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 695623788, + "step": 7174 + }, + { + "epoch": 0.701407899882675, + "loss": 0.07109981030225754, + "loss_ce": 0.004205281846225262, + "loss_iou": 0.2314453125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 695623788, + "step": 7174 + }, + { + "epoch": 0.7015056707078607, + "grad_norm": 23.375515631631142, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 695720560, + "step": 7175 + }, + { + "epoch": 0.7015056707078607, + "loss": 0.07460197806358337, + "loss_ce": 0.005342334974557161, + "loss_iou": 0.28515625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 695720560, + "step": 7175 + }, + { + "epoch": 0.7016034415330465, + "grad_norm": 17.13161994152382, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 695817680, + "step": 7176 + }, + { + "epoch": 0.7016034415330465, + "loss": 0.07869934290647507, + "loss_ce": 0.002969972090795636, + "loss_iou": 0.34765625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 695817680, + "step": 7176 + }, + { + "epoch": 0.7017012123582324, + "grad_norm": 7.156711643856752, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 695915016, + "step": 7177 + }, + { + "epoch": 0.7017012123582324, + "loss": 0.13073225319385529, + "loss_ce": 0.007258133497089148, + "loss_iou": 0.248046875, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 695915016, + "step": 7177 + }, + { + "epoch": 0.701798983183418, + "grad_norm": 5.146950324655763, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 696011484, + "step": 7178 + }, + { + "epoch": 0.701798983183418, + "loss": 0.07608793675899506, + "loss_ce": 0.005470264237374067, + "loss_iou": 0.33984375, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 696011484, + "step": 7178 + }, + { + "epoch": 0.7018967540086039, + "grad_norm": 7.163823799909512, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 696107956, + "step": 7179 + }, + { + "epoch": 0.7018967540086039, + "loss": 0.05218854174017906, + "loss_ce": 0.0035587786696851254, + "loss_iou": 0.26953125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 696107956, + "step": 7179 + }, + { + "epoch": 0.7019945248337895, + "grad_norm": 5.8210726397824875, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 696204280, + "step": 7180 + }, + { + "epoch": 0.7019945248337895, + "loss": 0.08202371001243591, + "loss_ce": 0.0036545763723552227, + "loss_iou": 0.240234375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 696204280, + "step": 7180 + }, + { + "epoch": 0.7020922956589754, + "grad_norm": 6.921402124298246, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 696301476, + "step": 7181 + }, + { + "epoch": 0.7020922956589754, + "loss": 0.060040514916181564, + "loss_ce": 0.007107777986675501, + "loss_iou": 0.267578125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 696301476, + "step": 7181 + }, + { + "epoch": 0.7021900664841612, + "grad_norm": 13.316045008253646, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 696398080, + "step": 7182 + }, + { + "epoch": 0.7021900664841612, + "loss": 0.09090495109558105, + "loss_ce": 0.006310229655355215, + "loss_iou": 0.365234375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 696398080, + "step": 7182 + }, + { + "epoch": 0.7022878373093469, + "grad_norm": 7.045094215686211, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 696494512, + "step": 7183 + }, + { + "epoch": 0.7022878373093469, + "loss": 0.06936483085155487, + "loss_ce": 0.0024092677049338818, + "loss_iou": 0.26953125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 696494512, + "step": 7183 + }, + { + "epoch": 0.7023856081345327, + "grad_norm": 5.314395800345795, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 696590736, + "step": 7184 + }, + { + "epoch": 0.7023856081345327, + "loss": 0.09952971339225769, + "loss_ce": 0.009960614144802094, + "loss_iou": 0.26171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 696590736, + "step": 7184 + }, + { + "epoch": 0.7024833789597185, + "grad_norm": 7.839868673577287, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 696686476, + "step": 7185 + }, + { + "epoch": 0.7024833789597185, + "loss": 0.0857616662979126, + "loss_ce": 0.006110784597694874, + "loss_iou": 0.2421875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 696686476, + "step": 7185 + }, + { + "epoch": 0.7025811497849042, + "grad_norm": 9.600378515791972, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 696783460, + "step": 7186 + }, + { + "epoch": 0.7025811497849042, + "loss": 0.10156035423278809, + "loss_ce": 0.0021951179951429367, + "loss_iou": 0.41796875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 696783460, + "step": 7186 + }, + { + "epoch": 0.70267892061009, + "grad_norm": 18.335820063125087, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 696880732, + "step": 7187 + }, + { + "epoch": 0.70267892061009, + "loss": 0.07927598059177399, + "loss_ce": 0.005911720916628838, + "loss_iou": 0.361328125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 696880732, + "step": 7187 + }, + { + "epoch": 0.7027766914352758, + "grad_norm": 8.211309527842237, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 696977360, + "step": 7188 + }, + { + "epoch": 0.7027766914352758, + "loss": 0.0457809641957283, + "loss_ce": 0.002743549644947052, + "loss_iou": 0.318359375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 696977360, + "step": 7188 + }, + { + "epoch": 0.7028744622604615, + "grad_norm": 4.253256185973423, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 697074728, + "step": 7189 + }, + { + "epoch": 0.7028744622604615, + "loss": 0.04994020611047745, + "loss_ce": 0.009382345713675022, + "loss_iou": 0.27734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 697074728, + "step": 7189 + }, + { + "epoch": 0.7029722330856473, + "grad_norm": 5.1512374021444955, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 697171428, + "step": 7190 + }, + { + "epoch": 0.7029722330856473, + "loss": 0.11657409369945526, + "loss_ce": 0.004086299799382687, + "loss_iou": 0.33203125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 697171428, + "step": 7190 + }, + { + "epoch": 0.703070003910833, + "grad_norm": 4.098771146476658, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 697268784, + "step": 7191 + }, + { + "epoch": 0.703070003910833, + "loss": 0.06521030515432358, + "loss_ce": 0.0038089370355010033, + "loss_iou": 0.333984375, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 697268784, + "step": 7191 + }, + { + "epoch": 0.7031677747360188, + "grad_norm": 8.439000979009462, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 697365088, + "step": 7192 + }, + { + "epoch": 0.7031677747360188, + "loss": 0.09681795537471771, + "loss_ce": 0.011185629293322563, + "loss_iou": 0.267578125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 697365088, + "step": 7192 + }, + { + "epoch": 0.7032655455612046, + "grad_norm": 12.418218190838658, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 697462088, + "step": 7193 + }, + { + "epoch": 0.7032655455612046, + "loss": 0.07221844792366028, + "loss_ce": 0.009954954497516155, + "loss_iou": 0.296875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 697462088, + "step": 7193 + }, + { + "epoch": 0.7033633163863903, + "grad_norm": 16.55711232376295, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 697559480, + "step": 7194 + }, + { + "epoch": 0.7033633163863903, + "loss": 0.059742894023656845, + "loss_ce": 0.004887544550001621, + "loss_iou": 0.314453125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 697559480, + "step": 7194 + }, + { + "epoch": 0.7034610872115761, + "grad_norm": 17.210293091132517, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 697656660, + "step": 7195 + }, + { + "epoch": 0.7034610872115761, + "loss": 0.11491864919662476, + "loss_ce": 0.0067948633804917336, + "loss_iou": 0.26171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 697656660, + "step": 7195 + }, + { + "epoch": 0.7035588580367619, + "grad_norm": 23.083349976891174, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 697753340, + "step": 7196 + }, + { + "epoch": 0.7035588580367619, + "loss": 0.10394302010536194, + "loss_ce": 0.009880216792225838, + "loss_iou": 0.224609375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 697753340, + "step": 7196 + }, + { + "epoch": 0.7036566288619476, + "grad_norm": 8.938579207904064, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 697849832, + "step": 7197 + }, + { + "epoch": 0.7036566288619476, + "loss": 0.04659304395318031, + "loss_ce": 0.0043567162938416, + "loss_iou": 0.306640625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 697849832, + "step": 7197 + }, + { + "epoch": 0.7037543996871334, + "grad_norm": 2.568308704709423, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 697946420, + "step": 7198 + }, + { + "epoch": 0.7037543996871334, + "loss": 0.03857865557074547, + "loss_ce": 0.0023619215935468674, + "loss_iou": 0.3125, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 697946420, + "step": 7198 + }, + { + "epoch": 0.7038521705123191, + "grad_norm": 6.504360190321349, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 698042900, + "step": 7199 + }, + { + "epoch": 0.7038521705123191, + "loss": 0.055720601230859756, + "loss_ce": 0.011531146243214607, + "loss_iou": 0.287109375, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 698042900, + "step": 7199 + }, + { + "epoch": 0.7039499413375049, + "grad_norm": 10.533877313164428, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 698140212, + "step": 7200 + }, + { + "epoch": 0.7039499413375049, + "loss": 0.06250959634780884, + "loss_ce": 0.004442278295755386, + "loss_iou": 0.29296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 698140212, + "step": 7200 + }, + { + "epoch": 0.7040477121626907, + "grad_norm": 7.05075039441948, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 698237380, + "step": 7201 + }, + { + "epoch": 0.7040477121626907, + "loss": 0.05800589919090271, + "loss_ce": 0.004569624550640583, + "loss_iou": 0.349609375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 698237380, + "step": 7201 + }, + { + "epoch": 0.7041454829878764, + "grad_norm": 3.7520520993276727, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 698334472, + "step": 7202 + }, + { + "epoch": 0.7041454829878764, + "loss": 0.07204354554414749, + "loss_ce": 0.005423670634627342, + "loss_iou": 0.32421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 698334472, + "step": 7202 + }, + { + "epoch": 0.7042432538130622, + "grad_norm": 3.795146524252928, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 698431108, + "step": 7203 + }, + { + "epoch": 0.7042432538130622, + "loss": 0.07278402149677277, + "loss_ce": 0.0028987685218453407, + "loss_iou": 0.29296875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 698431108, + "step": 7203 + }, + { + "epoch": 0.704341024638248, + "grad_norm": 9.091656537544457, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 698528388, + "step": 7204 + }, + { + "epoch": 0.704341024638248, + "loss": 0.08612386882305145, + "loss_ce": 0.0029634626116603613, + "loss_iou": 0.267578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 698528388, + "step": 7204 + }, + { + "epoch": 0.7044387954634337, + "grad_norm": 9.38877270721789, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 698624880, + "step": 7205 + }, + { + "epoch": 0.7044387954634337, + "loss": 0.08117194473743439, + "loss_ce": 0.005732485093176365, + "loss_iou": 0.27734375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 698624880, + "step": 7205 + }, + { + "epoch": 0.7045365662886195, + "grad_norm": 5.584010836013017, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 698722476, + "step": 7206 + }, + { + "epoch": 0.7045365662886195, + "loss": 0.06170380488038063, + "loss_ce": 0.003689887933433056, + "loss_iou": 0.37890625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 698722476, + "step": 7206 + }, + { + "epoch": 0.7046343371138052, + "grad_norm": 10.636063067035264, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 698819108, + "step": 7207 + }, + { + "epoch": 0.7046343371138052, + "loss": 0.1090770959854126, + "loss_ce": 0.005027415230870247, + "loss_iou": 0.232421875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 698819108, + "step": 7207 + }, + { + "epoch": 0.704732107938991, + "grad_norm": 11.197358624617285, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 698916728, + "step": 7208 + }, + { + "epoch": 0.704732107938991, + "loss": 0.11306321620941162, + "loss_ce": 0.008952499367296696, + "loss_iou": 0.1787109375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 698916728, + "step": 7208 + }, + { + "epoch": 0.7048298787641768, + "grad_norm": 4.130134521648459, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 699013708, + "step": 7209 + }, + { + "epoch": 0.7048298787641768, + "loss": 0.0800553560256958, + "loss_ce": 0.003158688312396407, + "loss_iou": 0.234375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 699013708, + "step": 7209 + }, + { + "epoch": 0.7049276495893625, + "grad_norm": 5.1549998652642195, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 699109812, + "step": 7210 + }, + { + "epoch": 0.7049276495893625, + "loss": 0.05659890174865723, + "loss_ce": 0.003231282578781247, + "loss_iou": 0.26953125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 699109812, + "step": 7210 + }, + { + "epoch": 0.7050254204145483, + "grad_norm": 3.597620762409306, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 699206796, + "step": 7211 + }, + { + "epoch": 0.7050254204145483, + "loss": 0.07718408107757568, + "loss_ce": 0.0035451685544103384, + "loss_iou": 0.224609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 699206796, + "step": 7211 + }, + { + "epoch": 0.7051231912397341, + "grad_norm": 3.820429745361115, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 699303700, + "step": 7212 + }, + { + "epoch": 0.7051231912397341, + "loss": 0.08169859647750854, + "loss_ce": 0.001925640506669879, + "loss_iou": 0.29296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 699303700, + "step": 7212 + }, + { + "epoch": 0.7052209620649198, + "grad_norm": 3.518292937563008, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 699399988, + "step": 7213 + }, + { + "epoch": 0.7052209620649198, + "loss": 0.05318289250135422, + "loss_ce": 0.004400544334203005, + "loss_iou": 0.263671875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 699399988, + "step": 7213 + }, + { + "epoch": 0.7053187328901056, + "grad_norm": 4.453819897543165, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 699496884, + "step": 7214 + }, + { + "epoch": 0.7053187328901056, + "loss": 0.05308190733194351, + "loss_ce": 0.0028041996993124485, + "loss_iou": 0.30859375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 699496884, + "step": 7214 + }, + { + "epoch": 0.7054165037152914, + "grad_norm": 2.3806275752462804, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 699594208, + "step": 7215 + }, + { + "epoch": 0.7054165037152914, + "loss": 0.07454070448875427, + "loss_ce": 0.006791682913899422, + "loss_iou": 0.326171875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 699594208, + "step": 7215 + }, + { + "epoch": 0.7055142745404771, + "grad_norm": 14.316590031879448, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 699691100, + "step": 7216 + }, + { + "epoch": 0.7055142745404771, + "loss": 0.07191400229930878, + "loss_ce": 0.0052254656329751015, + "loss_iou": 0.181640625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 699691100, + "step": 7216 + }, + { + "epoch": 0.7056120453656629, + "grad_norm": 24.39693307000394, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 699788484, + "step": 7217 + }, + { + "epoch": 0.7056120453656629, + "loss": 0.07010498642921448, + "loss_ce": 0.005468758754432201, + "loss_iou": 0.263671875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 699788484, + "step": 7217 + }, + { + "epoch": 0.7057098161908486, + "grad_norm": 5.397053177564322, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 699884748, + "step": 7218 + }, + { + "epoch": 0.7057098161908486, + "loss": 0.07624535262584686, + "loss_ce": 0.008534472435712814, + "loss_iou": 0.2421875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 699884748, + "step": 7218 + }, + { + "epoch": 0.7058075870160344, + "grad_norm": 7.050832480399227, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 699981140, + "step": 7219 + }, + { + "epoch": 0.7058075870160344, + "loss": 0.07996727526187897, + "loss_ce": 0.0037648878060281277, + "loss_iou": 0.2255859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 699981140, + "step": 7219 + }, + { + "epoch": 0.7059053578412202, + "grad_norm": 7.689709981425746, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 700078556, + "step": 7220 + }, + { + "epoch": 0.7059053578412202, + "loss": 0.05737677961587906, + "loss_ce": 0.005603709723800421, + "loss_iou": 0.271484375, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 700078556, + "step": 7220 + }, + { + "epoch": 0.7060031286664059, + "grad_norm": 23.760379355231105, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 700175836, + "step": 7221 + }, + { + "epoch": 0.7060031286664059, + "loss": 0.09325534105300903, + "loss_ce": 0.004296598955988884, + "loss_iou": 0.3515625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 700175836, + "step": 7221 + }, + { + "epoch": 0.7061008994915917, + "grad_norm": 15.011311940629517, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 700272672, + "step": 7222 + }, + { + "epoch": 0.7061008994915917, + "loss": 0.08681629598140717, + "loss_ce": 0.00362537894397974, + "loss_iou": 0.373046875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 700272672, + "step": 7222 + }, + { + "epoch": 0.7061986703167775, + "grad_norm": 5.248760345022264, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 700370204, + "step": 7223 + }, + { + "epoch": 0.7061986703167775, + "loss": 0.09838041663169861, + "loss_ce": 0.00643096212297678, + "loss_iou": 0.27734375, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 700370204, + "step": 7223 + }, + { + "epoch": 0.7062964411419632, + "grad_norm": 17.50533211258496, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 700467260, + "step": 7224 + }, + { + "epoch": 0.7062964411419632, + "loss": 0.10090982913970947, + "loss_ce": 0.004115709103643894, + "loss_iou": 0.345703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 700467260, + "step": 7224 + }, + { + "epoch": 0.706394211967149, + "grad_norm": 9.679495819626556, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 700564156, + "step": 7225 + }, + { + "epoch": 0.706394211967149, + "loss": 0.06673315167427063, + "loss_ce": 0.009695799089968204, + "loss_iou": 0.1845703125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 700564156, + "step": 7225 + }, + { + "epoch": 0.7064919827923347, + "grad_norm": 8.57102631374809, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 700661712, + "step": 7226 + }, + { + "epoch": 0.7064919827923347, + "loss": 0.059329912066459656, + "loss_ce": 0.006999894976615906, + "loss_iou": 0.23046875, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 700661712, + "step": 7226 + }, + { + "epoch": 0.7065897536175205, + "grad_norm": 10.663292713352805, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 700758844, + "step": 7227 + }, + { + "epoch": 0.7065897536175205, + "loss": 0.09142235666513443, + "loss_ce": 0.006106652785092592, + "loss_iou": 0.251953125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 700758844, + "step": 7227 + }, + { + "epoch": 0.7066875244427063, + "grad_norm": 11.590950008298474, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 700855848, + "step": 7228 + }, + { + "epoch": 0.7066875244427063, + "loss": 0.057320453226566315, + "loss_ce": 0.006554462481290102, + "loss_iou": 0.224609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 700855848, + "step": 7228 + }, + { + "epoch": 0.706785295267892, + "grad_norm": 5.000152162462362, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 700953076, + "step": 7229 + }, + { + "epoch": 0.706785295267892, + "loss": 0.06469755619764328, + "loss_ce": 0.007778459694236517, + "loss_iou": 0.1982421875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 700953076, + "step": 7229 + }, + { + "epoch": 0.7068830660930778, + "grad_norm": 8.347348712433735, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 701049388, + "step": 7230 + }, + { + "epoch": 0.7068830660930778, + "loss": 0.0767727792263031, + "loss_ce": 0.0035153308417648077, + "loss_iou": 0.283203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 701049388, + "step": 7230 + }, + { + "epoch": 0.7069808369182636, + "grad_norm": 5.052105261259517, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 701146348, + "step": 7231 + }, + { + "epoch": 0.7069808369182636, + "loss": 0.09753750264644623, + "loss_ce": 0.006274683866649866, + "loss_iou": 0.306640625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 701146348, + "step": 7231 + }, + { + "epoch": 0.7070786077434493, + "grad_norm": 7.250961568384482, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 701243836, + "step": 7232 + }, + { + "epoch": 0.7070786077434493, + "loss": 0.08453808724880219, + "loss_ce": 0.010983092710375786, + "loss_iou": 0.2734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 701243836, + "step": 7232 + }, + { + "epoch": 0.7071763785686351, + "grad_norm": 3.407098691878277, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 701340188, + "step": 7233 + }, + { + "epoch": 0.7071763785686351, + "loss": 0.095459945499897, + "loss_ce": 0.008233077824115753, + "loss_iou": 0.212890625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 701340188, + "step": 7233 + }, + { + "epoch": 0.7072741493938209, + "grad_norm": 3.1301017605396315, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 701436152, + "step": 7234 + }, + { + "epoch": 0.7072741493938209, + "loss": 0.12252552062273026, + "loss_ce": 0.00645190617069602, + "loss_iou": 0.173828125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 701436152, + "step": 7234 + }, + { + "epoch": 0.7073719202190066, + "grad_norm": 7.258857136552495, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 701533240, + "step": 7235 + }, + { + "epoch": 0.7073719202190066, + "loss": 0.09062263369560242, + "loss_ce": 0.009613722562789917, + "loss_iou": 0.294921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 701533240, + "step": 7235 + }, + { + "epoch": 0.7074696910441924, + "grad_norm": 8.593626599644436, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 701630604, + "step": 7236 + }, + { + "epoch": 0.7074696910441924, + "loss": 0.08879370242357254, + "loss_ce": 0.00682348757982254, + "loss_iou": 0.2890625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 701630604, + "step": 7236 + }, + { + "epoch": 0.7075674618693781, + "grad_norm": 12.092229734378856, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 701725548, + "step": 7237 + }, + { + "epoch": 0.7075674618693781, + "loss": 0.05271313339471817, + "loss_ce": 0.0017030042363330722, + "loss_iou": 0.1220703125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 701725548, + "step": 7237 + }, + { + "epoch": 0.7076652326945639, + "grad_norm": 11.713326800493752, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 701822016, + "step": 7238 + }, + { + "epoch": 0.7076652326945639, + "loss": 0.09504376351833344, + "loss_ce": 0.0048719486221671104, + "loss_iou": 0.2333984375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 701822016, + "step": 7238 + }, + { + "epoch": 0.7077630035197497, + "grad_norm": 30.0577929520465, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 701919176, + "step": 7239 + }, + { + "epoch": 0.7077630035197497, + "loss": 0.06759968400001526, + "loss_ce": 0.007632641587406397, + "loss_iou": 0.3046875, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 701919176, + "step": 7239 + }, + { + "epoch": 0.7078607743449354, + "grad_norm": 20.269454179000373, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 702017132, + "step": 7240 + }, + { + "epoch": 0.7078607743449354, + "loss": 0.09956644475460052, + "loss_ce": 0.006632792763411999, + "loss_iou": 0.31640625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 702017132, + "step": 7240 + }, + { + "epoch": 0.7079585451701212, + "grad_norm": 3.762230882740064, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 702114160, + "step": 7241 + }, + { + "epoch": 0.7079585451701212, + "loss": 0.08618688583374023, + "loss_ce": 0.006902220658957958, + "loss_iou": 0.302734375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 702114160, + "step": 7241 + }, + { + "epoch": 0.708056315995307, + "grad_norm": 3.2179938698052513, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 702211136, + "step": 7242 + }, + { + "epoch": 0.708056315995307, + "loss": 0.08978069573640823, + "loss_ce": 0.0020884377881884575, + "loss_iou": 0.328125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 702211136, + "step": 7242 + }, + { + "epoch": 0.7081540868204927, + "grad_norm": 28.009566408791418, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 702309336, + "step": 7243 + }, + { + "epoch": 0.7081540868204927, + "loss": 0.07459907233715057, + "loss_ce": 0.00457648653537035, + "loss_iou": 0.34765625, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 702309336, + "step": 7243 + }, + { + "epoch": 0.7082518576456786, + "grad_norm": 15.466427736652305, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 702407032, + "step": 7244 + }, + { + "epoch": 0.7082518576456786, + "loss": 0.06076667457818985, + "loss_ce": 0.004126051440834999, + "loss_iou": 0.37890625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 702407032, + "step": 7244 + }, + { + "epoch": 0.7083496284708642, + "grad_norm": 8.834439785861669, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 702504128, + "step": 7245 + }, + { + "epoch": 0.7083496284708642, + "loss": 0.10154624283313751, + "loss_ce": 0.009077980183064938, + "loss_iou": 0.294921875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 702504128, + "step": 7245 + }, + { + "epoch": 0.70844739929605, + "grad_norm": 5.594879495377024, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 702601768, + "step": 7246 + }, + { + "epoch": 0.70844739929605, + "loss": 0.09485732018947601, + "loss_ce": 0.007630448788404465, + "loss_iou": 0.30859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 702601768, + "step": 7246 + }, + { + "epoch": 0.7085451701212359, + "grad_norm": 7.775851142781951, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 702699864, + "step": 7247 + }, + { + "epoch": 0.7085451701212359, + "loss": 0.07314172387123108, + "loss_ce": 0.003912597894668579, + "loss_iou": 0.294921875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 702699864, + "step": 7247 + }, + { + "epoch": 0.7086429409464216, + "grad_norm": 4.823772772358217, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 702795708, + "step": 7248 + }, + { + "epoch": 0.7086429409464216, + "loss": 0.05718778818845749, + "loss_ce": 0.0037562758661806583, + "loss_iou": 0.1826171875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 702795708, + "step": 7248 + }, + { + "epoch": 0.7087407117716074, + "grad_norm": 13.493275998197655, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 702891988, + "step": 7249 + }, + { + "epoch": 0.7087407117716074, + "loss": 0.06372129172086716, + "loss_ce": 0.007485026493668556, + "loss_iou": 0.1962890625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 702891988, + "step": 7249 + }, + { + "epoch": 0.7088384825967932, + "grad_norm": 3.6563238179158923, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 702989356, + "step": 7250 + }, + { + "epoch": 0.7088384825967932, + "eval_seeclick_CIoU": 0.43444594740867615, + "eval_seeclick_GIoU": 0.4396408945322037, + "eval_seeclick_IoU": 0.5129030495882034, + "eval_seeclick_MAE_all": 0.1164388507604599, + "eval_seeclick_MAE_h": 0.04804465174674988, + "eval_seeclick_MAE_w": 0.17986424267292023, + "eval_seeclick_MAE_x": 0.19162621349096298, + "eval_seeclick_MAE_y": 0.04622025415301323, + "eval_seeclick_NUM_probability": 0.9999969601631165, + "eval_seeclick_inside_bbox": 0.7386363744735718, + "eval_seeclick_loss": 0.3463895320892334, + "eval_seeclick_loss_ce": 0.009654406923800707, + "eval_seeclick_loss_iou": 0.3660888671875, + "eval_seeclick_loss_num": 0.06288909912109375, + "eval_seeclick_loss_xval": 0.31414794921875, + "eval_seeclick_runtime": 76.8642, + "eval_seeclick_samples_per_second": 0.559, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 702989356, + "step": 7250 + }, + { + "epoch": 0.7088384825967932, + "eval_icons_CIoU": 0.7112241387367249, + "eval_icons_GIoU": 0.7061667144298553, + "eval_icons_IoU": 0.7359445691108704, + "eval_icons_MAE_all": 0.055390745401382446, + "eval_icons_MAE_h": 0.06164373271167278, + "eval_icons_MAE_w": 0.05030917935073376, + "eval_icons_MAE_x": 0.04983130656182766, + "eval_icons_MAE_y": 0.05977877415716648, + "eval_icons_NUM_probability": 0.9999980926513672, + "eval_icons_inside_bbox": 0.8472222089767456, + "eval_icons_loss": 0.1681692749261856, + "eval_icons_loss_ce": 5.866079618499498e-06, + "eval_icons_loss_iou": 0.32952880859375, + "eval_icons_loss_num": 0.038974761962890625, + "eval_icons_loss_xval": 0.194915771484375, + "eval_icons_runtime": 86.0042, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 702989356, + "step": 7250 + }, + { + "epoch": 0.7088384825967932, + "eval_screenspot_CIoU": 0.306338091691335, + "eval_screenspot_GIoU": 0.2914280394713084, + "eval_screenspot_IoU": 0.3970351815223694, + "eval_screenspot_MAE_all": 0.16299018760522208, + "eval_screenspot_MAE_h": 0.11059591919183731, + "eval_screenspot_MAE_w": 0.22638187805811563, + "eval_screenspot_MAE_x": 0.21012373268604279, + "eval_screenspot_MAE_y": 0.1048592080672582, + "eval_screenspot_NUM_probability": 0.9999959866205851, + "eval_screenspot_inside_bbox": 0.6270833412806193, + "eval_screenspot_loss": 0.5762022733688354, + "eval_screenspot_loss_ce": 0.02036152935276429, + "eval_screenspot_loss_iou": 0.3380533854166667, + "eval_screenspot_loss_num": 0.11191813151041667, + "eval_screenspot_loss_xval": 0.5598551432291666, + "eval_screenspot_runtime": 163.6836, + "eval_screenspot_samples_per_second": 0.544, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 702989356, + "step": 7250 + }, + { + "epoch": 0.7088384825967932, + "eval_compot_CIoU": 0.5172017812728882, + "eval_compot_GIoU": 0.5150037556886673, + "eval_compot_IoU": 0.5770991444587708, + "eval_compot_MAE_all": 0.08680082857608795, + "eval_compot_MAE_h": 0.062375519424676895, + "eval_compot_MAE_w": 0.11145451292395592, + "eval_compot_MAE_x": 0.10811929032206535, + "eval_compot_MAE_y": 0.06525400280952454, + "eval_compot_NUM_probability": 0.9999922811985016, + "eval_compot_inside_bbox": 0.7795138955116272, + "eval_compot_loss": 0.2836616039276123, + "eval_compot_loss_ce": 0.01642550155520439, + "eval_compot_loss_iou": 0.4405517578125, + "eval_compot_loss_num": 0.0489501953125, + "eval_compot_loss_xval": 0.24462890625, + "eval_compot_runtime": 91.4435, + "eval_compot_samples_per_second": 0.547, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 702989356, + "step": 7250 + }, + { + "epoch": 0.7088384825967932, + "loss": 0.22615769505500793, + "loss_ce": 0.016501937061548233, + "loss_iou": 0.451171875, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 702989356, + "step": 7250 + }, + { + "epoch": 0.7089362534219789, + "grad_norm": 6.939457158998112, + "learning_rate": 5e-05, + "loss": 0.042, + "num_input_tokens_seen": 703087116, + "step": 7251 + }, + { + "epoch": 0.7089362534219789, + "loss": 0.03479661047458649, + "loss_ce": 0.005293742753565311, + "loss_iou": 0.294921875, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 703087116, + "step": 7251 + }, + { + "epoch": 0.7090340242471647, + "grad_norm": 16.060255037147897, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 703184320, + "step": 7252 + }, + { + "epoch": 0.7090340242471647, + "loss": 0.0730922520160675, + "loss_ce": 0.006401803810149431, + "loss_iou": 0.267578125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 703184320, + "step": 7252 + }, + { + "epoch": 0.7091317950723504, + "grad_norm": 14.502134569493377, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 703281368, + "step": 7253 + }, + { + "epoch": 0.7091317950723504, + "loss": 0.1015312448143959, + "loss_ce": 0.009734373539686203, + "loss_iou": 0.359375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 703281368, + "step": 7253 + }, + { + "epoch": 0.7092295658975362, + "grad_norm": 17.21925902162528, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 703378668, + "step": 7254 + }, + { + "epoch": 0.7092295658975362, + "loss": 0.07886999845504761, + "loss_ce": 0.003796755336225033, + "loss_iou": 0.3125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 703378668, + "step": 7254 + }, + { + "epoch": 0.709327336722722, + "grad_norm": 30.08132036168657, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 703475192, + "step": 7255 + }, + { + "epoch": 0.709327336722722, + "loss": 0.090298131108284, + "loss_ce": 0.002529578283429146, + "loss_iou": 0.27734375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 703475192, + "step": 7255 + }, + { + "epoch": 0.7094251075479077, + "grad_norm": 6.485361686746425, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 703571868, + "step": 7256 + }, + { + "epoch": 0.7094251075479077, + "loss": 0.027431420981884003, + "loss_ce": 0.002193383639678359, + "loss_iou": 0.26953125, + "loss_num": 0.005035400390625, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 703571868, + "step": 7256 + }, + { + "epoch": 0.7095228783730935, + "grad_norm": 5.075628736398672, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 703668404, + "step": 7257 + }, + { + "epoch": 0.7095228783730935, + "loss": 0.031487174332141876, + "loss_ce": 0.003060051938518882, + "loss_iou": 0.2734375, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 703668404, + "step": 7257 + }, + { + "epoch": 0.7096206491982793, + "grad_norm": 15.05868427917742, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 703765696, + "step": 7258 + }, + { + "epoch": 0.7096206491982793, + "loss": 0.08570140600204468, + "loss_ce": 0.007271226495504379, + "loss_iou": 0.3046875, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 703765696, + "step": 7258 + }, + { + "epoch": 0.709718420023465, + "grad_norm": 11.510278985582872, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 703862804, + "step": 7259 + }, + { + "epoch": 0.709718420023465, + "loss": 0.07783398777246475, + "loss_ce": 0.004301885142922401, + "loss_iou": 0.2734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 703862804, + "step": 7259 + }, + { + "epoch": 0.7098161908486508, + "grad_norm": 6.078740988544295, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 703959812, + "step": 7260 + }, + { + "epoch": 0.7098161908486508, + "loss": 0.08196191489696503, + "loss_ce": 0.00458459323272109, + "loss_iou": 0.361328125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 703959812, + "step": 7260 + }, + { + "epoch": 0.7099139616738366, + "grad_norm": 4.373576979866763, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 704055808, + "step": 7261 + }, + { + "epoch": 0.7099139616738366, + "loss": 0.0685291737318039, + "loss_ce": 0.0026493454352021217, + "loss_iou": 0.1748046875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 704055808, + "step": 7261 + }, + { + "epoch": 0.7100117324990223, + "grad_norm": 14.883883919400455, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 704153340, + "step": 7262 + }, + { + "epoch": 0.7100117324990223, + "loss": 0.0376087948679924, + "loss_ce": 0.0033680724445730448, + "loss_iou": 0.32421875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 704153340, + "step": 7262 + }, + { + "epoch": 0.7101095033242081, + "grad_norm": 14.736828527564333, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 704249536, + "step": 7263 + }, + { + "epoch": 0.7101095033242081, + "loss": 0.07850194722414017, + "loss_ce": 0.0023453342728316784, + "loss_iou": 0.23046875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 704249536, + "step": 7263 + }, + { + "epoch": 0.7102072741493938, + "grad_norm": 63.20314409448574, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 704346192, + "step": 7264 + }, + { + "epoch": 0.7102072741493938, + "loss": 0.07151175290346146, + "loss_ce": 0.0038008757401257753, + "loss_iou": 0.294921875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 704346192, + "step": 7264 + }, + { + "epoch": 0.7103050449745796, + "grad_norm": 36.55659871741906, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 704442764, + "step": 7265 + }, + { + "epoch": 0.7103050449745796, + "loss": 0.07525569200515747, + "loss_ce": 0.0031121419742703438, + "loss_iou": 0.2412109375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 704442764, + "step": 7265 + }, + { + "epoch": 0.7104028157997654, + "grad_norm": 15.163230737466241, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 704541068, + "step": 7266 + }, + { + "epoch": 0.7104028157997654, + "loss": 0.0864730179309845, + "loss_ce": 0.005822694394737482, + "loss_iou": 0.234375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 704541068, + "step": 7266 + }, + { + "epoch": 0.7105005866249511, + "grad_norm": 10.482493881393118, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 704638484, + "step": 7267 + }, + { + "epoch": 0.7105005866249511, + "loss": 0.08761797100305557, + "loss_ce": 0.0069447532296180725, + "loss_iou": 0.28125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 704638484, + "step": 7267 + }, + { + "epoch": 0.7105983574501369, + "grad_norm": 12.544619121062716, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 704735628, + "step": 7268 + }, + { + "epoch": 0.7105983574501369, + "loss": 0.10014462471008301, + "loss_ce": 0.0036709345877170563, + "loss_iou": 0.28125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 704735628, + "step": 7268 + }, + { + "epoch": 0.7106961282753227, + "grad_norm": 23.40947228494542, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 704833560, + "step": 7269 + }, + { + "epoch": 0.7106961282753227, + "loss": 0.07569067925214767, + "loss_ce": 0.0078043257817626, + "loss_iou": 0.341796875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 704833560, + "step": 7269 + }, + { + "epoch": 0.7107938991005084, + "grad_norm": 19.5895637425106, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 704929688, + "step": 7270 + }, + { + "epoch": 0.7107938991005084, + "loss": 0.06555799394845963, + "loss_ce": 0.007025282829999924, + "loss_iou": 0.3046875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 704929688, + "step": 7270 + }, + { + "epoch": 0.7108916699256942, + "grad_norm": 15.99895920337238, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 705027364, + "step": 7271 + }, + { + "epoch": 0.7108916699256942, + "loss": 0.1031690239906311, + "loss_ce": 0.0045209540985524654, + "loss_iou": 0.3359375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 705027364, + "step": 7271 + }, + { + "epoch": 0.7109894407508799, + "grad_norm": 6.053703961733935, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 705123612, + "step": 7272 + }, + { + "epoch": 0.7109894407508799, + "loss": 0.04891543835401535, + "loss_ce": 0.002574498299509287, + "loss_iou": 0.328125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 705123612, + "step": 7272 + }, + { + "epoch": 0.7110872115760657, + "grad_norm": 13.12545556714386, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 705220548, + "step": 7273 + }, + { + "epoch": 0.7110872115760657, + "loss": 0.11646512895822525, + "loss_ce": 0.0100808534771204, + "loss_iou": 0.291015625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 705220548, + "step": 7273 + }, + { + "epoch": 0.7111849824012515, + "grad_norm": 6.902110234074392, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 705317636, + "step": 7274 + }, + { + "epoch": 0.7111849824012515, + "loss": 0.07291937619447708, + "loss_ce": 0.0038123182021081448, + "loss_iou": 0.25390625, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 705317636, + "step": 7274 + }, + { + "epoch": 0.7112827532264372, + "grad_norm": 8.491888846858583, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 705415684, + "step": 7275 + }, + { + "epoch": 0.7112827532264372, + "loss": 0.08117535710334778, + "loss_ce": 0.005507020279765129, + "loss_iou": 0.2373046875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 705415684, + "step": 7275 + }, + { + "epoch": 0.711380524051623, + "grad_norm": 2.2730748447383067, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 705513212, + "step": 7276 + }, + { + "epoch": 0.711380524051623, + "loss": 0.08047740161418915, + "loss_ce": 0.0011774778831750154, + "loss_iou": 0.40625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 705513212, + "step": 7276 + }, + { + "epoch": 0.7114782948768088, + "grad_norm": 3.072406984731764, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 705610448, + "step": 7277 + }, + { + "epoch": 0.7114782948768088, + "loss": 0.06063521280884743, + "loss_ce": 0.0028349196072667837, + "loss_iou": 0.232421875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 705610448, + "step": 7277 + }, + { + "epoch": 0.7115760657019945, + "grad_norm": 6.066329402293099, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 705707912, + "step": 7278 + }, + { + "epoch": 0.7115760657019945, + "loss": 0.055844251066446304, + "loss_ce": 0.0016679192194715142, + "loss_iou": 0.302734375, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 705707912, + "step": 7278 + }, + { + "epoch": 0.7116738365271803, + "grad_norm": 4.546044677196278, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 705805524, + "step": 7279 + }, + { + "epoch": 0.7116738365271803, + "loss": 0.04076225310564041, + "loss_ce": 0.007032697554677725, + "loss_iou": 0.263671875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 705805524, + "step": 7279 + }, + { + "epoch": 0.7117716073523661, + "grad_norm": 6.539637239713543, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 705902332, + "step": 7280 + }, + { + "epoch": 0.7117716073523661, + "loss": 0.060480035841464996, + "loss_ce": 0.004266658797860146, + "loss_iou": 0.349609375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 705902332, + "step": 7280 + }, + { + "epoch": 0.7118693781775518, + "grad_norm": 5.7544243187499475, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 705999160, + "step": 7281 + }, + { + "epoch": 0.7118693781775518, + "loss": 0.06717000901699066, + "loss_ce": 0.004235128406435251, + "loss_iou": 0.27734375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 705999160, + "step": 7281 + }, + { + "epoch": 0.7119671490027376, + "grad_norm": 4.952353515025824, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 706095644, + "step": 7282 + }, + { + "epoch": 0.7119671490027376, + "loss": 0.060245636850595474, + "loss_ce": 0.003208284266293049, + "loss_iou": 0.2412109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 706095644, + "step": 7282 + }, + { + "epoch": 0.7120649198279233, + "grad_norm": 8.164161624221501, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 706192748, + "step": 7283 + }, + { + "epoch": 0.7120649198279233, + "loss": 0.0808270275592804, + "loss_ce": 0.00348785356618464, + "loss_iou": 0.279296875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 706192748, + "step": 7283 + }, + { + "epoch": 0.7121626906531091, + "grad_norm": 7.24082363004528, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 706289416, + "step": 7284 + }, + { + "epoch": 0.7121626906531091, + "loss": 0.06581203639507294, + "loss_ce": 0.006089136935770512, + "loss_iou": 0.41015625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 706289416, + "step": 7284 + }, + { + "epoch": 0.7122604614782949, + "grad_norm": 4.6586115375592945, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 706386332, + "step": 7285 + }, + { + "epoch": 0.7122604614782949, + "loss": 0.06155247241258621, + "loss_ce": 0.006407210137695074, + "loss_iou": 0.296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 706386332, + "step": 7285 + }, + { + "epoch": 0.7123582323034806, + "grad_norm": 10.254326025026938, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 706482792, + "step": 7286 + }, + { + "epoch": 0.7123582323034806, + "loss": 0.06849835813045502, + "loss_ce": 0.004182556178420782, + "loss_iou": 0.341796875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 706482792, + "step": 7286 + }, + { + "epoch": 0.7124560031286664, + "grad_norm": 14.4565055995778, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 706580008, + "step": 7287 + }, + { + "epoch": 0.7124560031286664, + "loss": 0.08595547080039978, + "loss_ce": 0.00415310263633728, + "loss_iou": 0.28125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 706580008, + "step": 7287 + }, + { + "epoch": 0.7125537739538522, + "grad_norm": 2.8370139009739668, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 706677348, + "step": 7288 + }, + { + "epoch": 0.7125537739538522, + "loss": 0.06247325986623764, + "loss_ce": 0.0032539009116590023, + "loss_iou": 0.267578125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 706677348, + "step": 7288 + }, + { + "epoch": 0.7126515447790379, + "grad_norm": 8.563947081119466, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 706774088, + "step": 7289 + }, + { + "epoch": 0.7126515447790379, + "loss": 0.05776926502585411, + "loss_ce": 0.003962959628552198, + "loss_iou": 0.189453125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 706774088, + "step": 7289 + }, + { + "epoch": 0.7127493156042237, + "grad_norm": 2.5321964496374516, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 706871156, + "step": 7290 + }, + { + "epoch": 0.7127493156042237, + "loss": 0.04782598093152046, + "loss_ce": 0.00465123588219285, + "loss_iou": 0.1982421875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 706871156, + "step": 7290 + }, + { + "epoch": 0.7128470864294094, + "grad_norm": 8.25247025413462, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 706968096, + "step": 7291 + }, + { + "epoch": 0.7128470864294094, + "loss": 0.08016161620616913, + "loss_ce": 0.003715085331350565, + "loss_iou": 0.2158203125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 706968096, + "step": 7291 + }, + { + "epoch": 0.7129448572545952, + "grad_norm": 33.39673825015113, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 707064632, + "step": 7292 + }, + { + "epoch": 0.7129448572545952, + "loss": 0.09221678227186203, + "loss_ce": 0.01288825273513794, + "loss_iou": 0.24609375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 707064632, + "step": 7292 + }, + { + "epoch": 0.713042628079781, + "grad_norm": 86.66668498047768, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 707161928, + "step": 7293 + }, + { + "epoch": 0.713042628079781, + "loss": 0.09352074563503265, + "loss_ce": 0.003348937723785639, + "loss_iou": 0.3828125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 707161928, + "step": 7293 + }, + { + "epoch": 0.7131403989049667, + "grad_norm": 20.495933200439865, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 707258856, + "step": 7294 + }, + { + "epoch": 0.7131403989049667, + "loss": 0.07724674046039581, + "loss_ce": 0.006323887966573238, + "loss_iou": 0.326171875, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 707258856, + "step": 7294 + }, + { + "epoch": 0.7132381697301525, + "grad_norm": 7.7735742345588354, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 707355480, + "step": 7295 + }, + { + "epoch": 0.7132381697301525, + "loss": 0.1008090078830719, + "loss_ce": 0.006135852076113224, + "loss_iou": 0.1689453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 707355480, + "step": 7295 + }, + { + "epoch": 0.7133359405553383, + "grad_norm": 10.898325217716403, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 707453000, + "step": 7296 + }, + { + "epoch": 0.7133359405553383, + "loss": 0.06510880589485168, + "loss_ce": 0.003753217402845621, + "loss_iou": 0.27734375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 707453000, + "step": 7296 + }, + { + "epoch": 0.713433711380524, + "grad_norm": 2.175407144926729, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 707549528, + "step": 7297 + }, + { + "epoch": 0.713433711380524, + "loss": 0.08735717833042145, + "loss_ce": 0.008484497666358948, + "loss_iou": 0.1796875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 707549528, + "step": 7297 + }, + { + "epoch": 0.7135314822057098, + "grad_norm": 17.41061620750901, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 707646776, + "step": 7298 + }, + { + "epoch": 0.7135314822057098, + "loss": 0.10991702973842621, + "loss_ce": 0.009270058013498783, + "loss_iou": 0.2265625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 707646776, + "step": 7298 + }, + { + "epoch": 0.7136292530308955, + "grad_norm": 6.2925092142045775, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 707743228, + "step": 7299 + }, + { + "epoch": 0.7136292530308955, + "loss": 0.09102015197277069, + "loss_ce": 0.006906076800078154, + "loss_iou": 0.244140625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 707743228, + "step": 7299 + }, + { + "epoch": 0.7137270238560813, + "grad_norm": 6.08690982888457, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 707840148, + "step": 7300 + }, + { + "epoch": 0.7137270238560813, + "loss": 0.09359100461006165, + "loss_ce": 0.007142331916838884, + "loss_iou": 0.26171875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 707840148, + "step": 7300 + }, + { + "epoch": 0.7138247946812671, + "grad_norm": 44.35016888595055, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 707937576, + "step": 7301 + }, + { + "epoch": 0.7138247946812671, + "loss": 0.09764529764652252, + "loss_ce": 0.0048718638718128204, + "loss_iou": 0.3046875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 707937576, + "step": 7301 + }, + { + "epoch": 0.7139225655064528, + "grad_norm": 4.031760788743108, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 708034352, + "step": 7302 + }, + { + "epoch": 0.7139225655064528, + "loss": 0.08293987810611725, + "loss_ce": 0.0031440353486686945, + "loss_iou": 0.294921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 708034352, + "step": 7302 + }, + { + "epoch": 0.7140203363316386, + "grad_norm": 3.9737697206018625, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 708130672, + "step": 7303 + }, + { + "epoch": 0.7140203363316386, + "loss": 0.04378571733832359, + "loss_ce": 0.011109022423624992, + "loss_iou": 0.2470703125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 708130672, + "step": 7303 + }, + { + "epoch": 0.7141181071568244, + "grad_norm": 8.201828337362159, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 708227856, + "step": 7304 + }, + { + "epoch": 0.7141181071568244, + "loss": 0.060321591794490814, + "loss_ce": 0.004657532088458538, + "loss_iou": 0.265625, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 708227856, + "step": 7304 + }, + { + "epoch": 0.7142158779820101, + "grad_norm": 16.521058300969255, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 708324428, + "step": 7305 + }, + { + "epoch": 0.7142158779820101, + "loss": 0.07627663761377335, + "loss_ce": 0.007703641429543495, + "loss_iou": 0.353515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 708324428, + "step": 7305 + }, + { + "epoch": 0.7143136488071959, + "grad_norm": 14.493386113478843, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 708420492, + "step": 7306 + }, + { + "epoch": 0.7143136488071959, + "loss": 0.094608373939991, + "loss_ce": 0.007022923789918423, + "loss_iou": 0.35546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 708420492, + "step": 7306 + }, + { + "epoch": 0.7144114196323818, + "grad_norm": 16.716390145105585, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 708517628, + "step": 7307 + }, + { + "epoch": 0.7144114196323818, + "loss": 0.08009332418441772, + "loss_ce": 0.008445684798061848, + "loss_iou": 0.294921875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 708517628, + "step": 7307 + }, + { + "epoch": 0.7145091904575674, + "grad_norm": 4.484610428538033, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 708614888, + "step": 7308 + }, + { + "epoch": 0.7145091904575674, + "loss": 0.0699758231639862, + "loss_ce": 0.0031537676695734262, + "loss_iou": 0.265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 708614888, + "step": 7308 + }, + { + "epoch": 0.7146069612827533, + "grad_norm": 8.531070711628331, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 708712956, + "step": 7309 + }, + { + "epoch": 0.7146069612827533, + "loss": 0.08912909030914307, + "loss_ce": 0.004625917412340641, + "loss_iou": 0.2490234375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 708712956, + "step": 7309 + }, + { + "epoch": 0.714704732107939, + "grad_norm": 3.2136574406332548, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 708809984, + "step": 7310 + }, + { + "epoch": 0.714704732107939, + "loss": 0.042283445596694946, + "loss_ce": 0.0033430159091949463, + "loss_iou": 0.26171875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 708809984, + "step": 7310 + }, + { + "epoch": 0.7148025029331248, + "grad_norm": 5.323945628603344, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 708907016, + "step": 7311 + }, + { + "epoch": 0.7148025029331248, + "loss": 0.09042131900787354, + "loss_ce": 0.004575375933200121, + "loss_iou": 0.2734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 708907016, + "step": 7311 + }, + { + "epoch": 0.7149002737583106, + "grad_norm": 4.843926867735812, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 709003420, + "step": 7312 + }, + { + "epoch": 0.7149002737583106, + "loss": 0.1145036369562149, + "loss_ce": 0.0024125773925334215, + "loss_iou": 0.27734375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 709003420, + "step": 7312 + }, + { + "epoch": 0.7149980445834963, + "grad_norm": 11.281301776009418, + "learning_rate": 5e-05, + "loss": 0.1412, + "num_input_tokens_seen": 709100000, + "step": 7313 + }, + { + "epoch": 0.7149980445834963, + "loss": 0.1758042573928833, + "loss_ce": 0.006401184480637312, + "loss_iou": 0.1962890625, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 709100000, + "step": 7313 + }, + { + "epoch": 0.7150958154086821, + "grad_norm": 20.130664650766963, + "learning_rate": 5e-05, + "loss": 0.0972, + "num_input_tokens_seen": 709197008, + "step": 7314 + }, + { + "epoch": 0.7150958154086821, + "loss": 0.08745348453521729, + "loss_ce": 0.0062614609487354755, + "loss_iou": 0.28125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 709197008, + "step": 7314 + }, + { + "epoch": 0.7151935862338679, + "grad_norm": 6.529769078211002, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 709294344, + "step": 7315 + }, + { + "epoch": 0.7151935862338679, + "loss": 0.07743167877197266, + "loss_ce": 0.0016489068511873484, + "loss_iou": 0.33203125, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 709294344, + "step": 7315 + }, + { + "epoch": 0.7152913570590536, + "grad_norm": 4.688290198558694, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 709391496, + "step": 7316 + }, + { + "epoch": 0.7152913570590536, + "loss": 0.08908188343048096, + "loss_ce": 0.006775970570743084, + "loss_iou": 0.283203125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 709391496, + "step": 7316 + }, + { + "epoch": 0.7153891278842394, + "grad_norm": 11.022034934824807, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 709488628, + "step": 7317 + }, + { + "epoch": 0.7153891278842394, + "loss": 0.07657719403505325, + "loss_ce": 0.007897388190031052, + "loss_iou": 0.2236328125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 709488628, + "step": 7317 + }, + { + "epoch": 0.7154868987094251, + "grad_norm": 18.09799786354715, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 709585320, + "step": 7318 + }, + { + "epoch": 0.7154868987094251, + "loss": 0.06347465515136719, + "loss_ce": 0.005430220626294613, + "loss_iou": 0.3046875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 709585320, + "step": 7318 + }, + { + "epoch": 0.7155846695346109, + "grad_norm": 16.500003811228034, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 709682952, + "step": 7319 + }, + { + "epoch": 0.7155846695346109, + "loss": 0.05785655602812767, + "loss_ce": 0.005381579045206308, + "loss_iou": 0.330078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 709682952, + "step": 7319 + }, + { + "epoch": 0.7156824403597967, + "grad_norm": 10.149676536444115, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 709780540, + "step": 7320 + }, + { + "epoch": 0.7156824403597967, + "loss": 0.06788553297519684, + "loss_ce": 0.0056754546239972115, + "loss_iou": 0.28125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 709780540, + "step": 7320 + }, + { + "epoch": 0.7157802111849824, + "grad_norm": 6.18657726847316, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 709876864, + "step": 7321 + }, + { + "epoch": 0.7157802111849824, + "loss": 0.08588439226150513, + "loss_ce": 0.004318534396588802, + "loss_iou": 0.244140625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 709876864, + "step": 7321 + }, + { + "epoch": 0.7158779820101682, + "grad_norm": 3.9314076928548687, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 709973760, + "step": 7322 + }, + { + "epoch": 0.7158779820101682, + "loss": 0.11120596528053284, + "loss_ce": 0.0036009822506457567, + "loss_iou": 0.25, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 709973760, + "step": 7322 + }, + { + "epoch": 0.715975752835354, + "grad_norm": 3.060558847103793, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 710070516, + "step": 7323 + }, + { + "epoch": 0.715975752835354, + "loss": 0.07229544222354889, + "loss_ce": 0.00981069728732109, + "loss_iou": 0.2431640625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 710070516, + "step": 7323 + }, + { + "epoch": 0.7160735236605397, + "grad_norm": 2.103741769975449, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 710167104, + "step": 7324 + }, + { + "epoch": 0.7160735236605397, + "loss": 0.055384062230587006, + "loss_ce": 0.005182648543268442, + "loss_iou": 0.291015625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 710167104, + "step": 7324 + }, + { + "epoch": 0.7161712944857255, + "grad_norm": 11.504602879592799, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 710263460, + "step": 7325 + }, + { + "epoch": 0.7161712944857255, + "loss": 0.0889536589384079, + "loss_ce": 0.010195422917604446, + "loss_iou": 0.27734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 710263460, + "step": 7325 + }, + { + "epoch": 0.7162690653109113, + "grad_norm": 12.256512245514749, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 710359432, + "step": 7326 + }, + { + "epoch": 0.7162690653109113, + "loss": 0.05724278464913368, + "loss_ce": 0.005755817051976919, + "loss_iou": 0.326171875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 710359432, + "step": 7326 + }, + { + "epoch": 0.716366836136097, + "grad_norm": 15.057327979655717, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 710456008, + "step": 7327 + }, + { + "epoch": 0.716366836136097, + "loss": 0.10907921195030212, + "loss_ce": 0.0033129118382930756, + "loss_iou": 0.2255859375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 710456008, + "step": 7327 + }, + { + "epoch": 0.7164646069612828, + "grad_norm": 8.243465169968262, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 710553164, + "step": 7328 + }, + { + "epoch": 0.7164646069612828, + "loss": 0.10295513272285461, + "loss_ce": 0.007175716571509838, + "loss_iou": 0.294921875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 710553164, + "step": 7328 + }, + { + "epoch": 0.7165623777864685, + "grad_norm": 30.78821761145266, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 710649632, + "step": 7329 + }, + { + "epoch": 0.7165623777864685, + "loss": 0.07071191072463989, + "loss_ce": 0.00428277300670743, + "loss_iou": 0.1474609375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 710649632, + "step": 7329 + }, + { + "epoch": 0.7166601486116543, + "grad_norm": 8.850875308068037, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 710746144, + "step": 7330 + }, + { + "epoch": 0.7166601486116543, + "loss": 0.10340568423271179, + "loss_ce": 0.007717818953096867, + "loss_iou": 0.23046875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 710746144, + "step": 7330 + }, + { + "epoch": 0.7167579194368401, + "grad_norm": 4.036605417976641, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 710843492, + "step": 7331 + }, + { + "epoch": 0.7167579194368401, + "loss": 0.08903154730796814, + "loss_ce": 0.002514221938326955, + "loss_iou": 0.2734375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 710843492, + "step": 7331 + }, + { + "epoch": 0.7168556902620258, + "grad_norm": 5.417776390275588, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 710940008, + "step": 7332 + }, + { + "epoch": 0.7168556902620258, + "loss": 0.05489538237452507, + "loss_ce": 0.005311945453286171, + "loss_iou": 0.2236328125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 710940008, + "step": 7332 + }, + { + "epoch": 0.7169534610872116, + "grad_norm": 5.817475441836474, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 711037168, + "step": 7333 + }, + { + "epoch": 0.7169534610872116, + "loss": 0.05231112241744995, + "loss_ce": 0.006404103711247444, + "loss_iou": 0.23828125, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 711037168, + "step": 7333 + }, + { + "epoch": 0.7170512319123974, + "grad_norm": 8.285093548536924, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 711133104, + "step": 7334 + }, + { + "epoch": 0.7170512319123974, + "loss": 0.07953833043575287, + "loss_ce": 0.00953100435435772, + "loss_iou": 0.275390625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 711133104, + "step": 7334 + }, + { + "epoch": 0.7171490027375831, + "grad_norm": 6.68029887361298, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 711230688, + "step": 7335 + }, + { + "epoch": 0.7171490027375831, + "loss": 0.06227904558181763, + "loss_ce": 0.0011523328721523285, + "loss_iou": 0.330078125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 711230688, + "step": 7335 + }, + { + "epoch": 0.7172467735627689, + "grad_norm": 5.158486759554273, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 711328484, + "step": 7336 + }, + { + "epoch": 0.7172467735627689, + "loss": 0.07915250957012177, + "loss_ce": 0.004567550029605627, + "loss_iou": 0.345703125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 711328484, + "step": 7336 + }, + { + "epoch": 0.7173445443879546, + "grad_norm": 5.563892441554406, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 711425544, + "step": 7337 + }, + { + "epoch": 0.7173445443879546, + "loss": 0.08039447665214539, + "loss_ce": 0.005916325841099024, + "loss_iou": 0.31640625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 711425544, + "step": 7337 + }, + { + "epoch": 0.7174423152131404, + "grad_norm": 7.2337660522799245, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 711522440, + "step": 7338 + }, + { + "epoch": 0.7174423152131404, + "loss": 0.08007006347179413, + "loss_ce": 0.005652945023030043, + "loss_iou": 0.25390625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 711522440, + "step": 7338 + }, + { + "epoch": 0.7175400860383262, + "grad_norm": 16.716724752208993, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 711619504, + "step": 7339 + }, + { + "epoch": 0.7175400860383262, + "loss": 0.12706606090068817, + "loss_ce": 0.003347797319293022, + "loss_iou": 0.29296875, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 711619504, + "step": 7339 + }, + { + "epoch": 0.7176378568635119, + "grad_norm": 3.6187333925691316, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 711716140, + "step": 7340 + }, + { + "epoch": 0.7176378568635119, + "loss": 0.055105745792388916, + "loss_ce": 0.006887969560921192, + "loss_iou": 0.33984375, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 711716140, + "step": 7340 + }, + { + "epoch": 0.7177356276886977, + "grad_norm": 10.910145746993312, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 711813072, + "step": 7341 + }, + { + "epoch": 0.7177356276886977, + "loss": 0.0957624614238739, + "loss_ce": 0.0027753994800150394, + "loss_iou": 0.2421875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 711813072, + "step": 7341 + }, + { + "epoch": 0.7178333985138835, + "grad_norm": 1.7349544028467476, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 711909732, + "step": 7342 + }, + { + "epoch": 0.7178333985138835, + "loss": 0.06740503013134003, + "loss_ce": 0.007628720253705978, + "loss_iou": 0.265625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 711909732, + "step": 7342 + }, + { + "epoch": 0.7179311693390692, + "grad_norm": 1.7710787883438457, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 712006604, + "step": 7343 + }, + { + "epoch": 0.7179311693390692, + "loss": 0.05730810388922691, + "loss_ce": 0.003887079656124115, + "loss_iou": 0.306640625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 712006604, + "step": 7343 + }, + { + "epoch": 0.718028940164255, + "grad_norm": 10.360213884139904, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 712103392, + "step": 7344 + }, + { + "epoch": 0.718028940164255, + "loss": 0.10612647235393524, + "loss_ce": 0.0056931255385279655, + "loss_iou": 0.279296875, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 712103392, + "step": 7344 + }, + { + "epoch": 0.7181267109894407, + "grad_norm": 3.525398493750357, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 712200652, + "step": 7345 + }, + { + "epoch": 0.7181267109894407, + "loss": 0.07958526909351349, + "loss_ce": 0.00547333387658, + "loss_iou": 0.26171875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 712200652, + "step": 7345 + }, + { + "epoch": 0.7182244818146265, + "grad_norm": 10.621038559552128, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 712298192, + "step": 7346 + }, + { + "epoch": 0.7182244818146265, + "loss": 0.07695396989583969, + "loss_ce": 0.0058632707223296165, + "loss_iou": 0.306640625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 712298192, + "step": 7346 + }, + { + "epoch": 0.7183222526398123, + "grad_norm": 7.6232786647772155, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 712395824, + "step": 7347 + }, + { + "epoch": 0.7183222526398123, + "loss": 0.1040928065776825, + "loss_ce": 0.006467076949775219, + "loss_iou": 0.310546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 712395824, + "step": 7347 + }, + { + "epoch": 0.718420023464998, + "grad_norm": 51.92214495517686, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 712492656, + "step": 7348 + }, + { + "epoch": 0.718420023464998, + "loss": 0.06807916611433029, + "loss_ce": 0.007425482384860516, + "loss_iou": 0.2451171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 712492656, + "step": 7348 + }, + { + "epoch": 0.7185177942901838, + "grad_norm": 5.228778135317699, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 712589924, + "step": 7349 + }, + { + "epoch": 0.7185177942901838, + "loss": 0.0820956900715828, + "loss_ce": 0.010333606973290443, + "loss_iou": 0.1708984375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 712589924, + "step": 7349 + }, + { + "epoch": 0.7186155651153696, + "grad_norm": 6.341622866718869, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 712687036, + "step": 7350 + }, + { + "epoch": 0.7186155651153696, + "loss": 0.08644676208496094, + "loss_ce": 0.004858019296079874, + "loss_iou": 0.287109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 712687036, + "step": 7350 + }, + { + "epoch": 0.7187133359405553, + "grad_norm": 3.457792739296696, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 712783872, + "step": 7351 + }, + { + "epoch": 0.7187133359405553, + "loss": 0.08934086561203003, + "loss_ce": 0.006806081160902977, + "loss_iou": 0.3203125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 712783872, + "step": 7351 + }, + { + "epoch": 0.7188111067657411, + "grad_norm": 3.6262416037861667, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 712880876, + "step": 7352 + }, + { + "epoch": 0.7188111067657411, + "loss": 0.09434216469526291, + "loss_ce": 0.006146362517029047, + "loss_iou": 0.2197265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 712880876, + "step": 7352 + }, + { + "epoch": 0.7189088775909269, + "grad_norm": 4.485979066936401, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 712977492, + "step": 7353 + }, + { + "epoch": 0.7189088775909269, + "loss": 0.056273750960826874, + "loss_ce": 0.010566048324108124, + "loss_iou": 0.22265625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 712977492, + "step": 7353 + }, + { + "epoch": 0.7190066484161126, + "grad_norm": 8.682531899381207, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 713074128, + "step": 7354 + }, + { + "epoch": 0.7190066484161126, + "loss": 0.04783317446708679, + "loss_ce": 0.004143443889915943, + "loss_iou": 0.15625, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 713074128, + "step": 7354 + }, + { + "epoch": 0.7191044192412984, + "grad_norm": 1.9256228698493867, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 713171888, + "step": 7355 + }, + { + "epoch": 0.7191044192412984, + "loss": 0.03900906816124916, + "loss_ce": 0.005546545144170523, + "loss_iou": 0.283203125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 713171888, + "step": 7355 + }, + { + "epoch": 0.7192021900664841, + "grad_norm": 8.310189980851893, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 713269464, + "step": 7356 + }, + { + "epoch": 0.7192021900664841, + "loss": 0.13472142815589905, + "loss_ce": 0.005418441258370876, + "loss_iou": 0.279296875, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 713269464, + "step": 7356 + }, + { + "epoch": 0.7192999608916699, + "grad_norm": 4.7568805010702215, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 713366624, + "step": 7357 + }, + { + "epoch": 0.7192999608916699, + "loss": 0.10591883212327957, + "loss_ce": 0.005978532135486603, + "loss_iou": 0.341796875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 713366624, + "step": 7357 + }, + { + "epoch": 0.7193977317168557, + "grad_norm": 3.5872150152336597, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 713464060, + "step": 7358 + }, + { + "epoch": 0.7193977317168557, + "loss": 0.08708801865577698, + "loss_ce": 0.005690013058483601, + "loss_iou": 0.2890625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 713464060, + "step": 7358 + }, + { + "epoch": 0.7194955025420414, + "grad_norm": 14.937932804695667, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 713560432, + "step": 7359 + }, + { + "epoch": 0.7194955025420414, + "loss": 0.06662984192371368, + "loss_ce": 0.007578329648822546, + "loss_iou": 0.23046875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 713560432, + "step": 7359 + }, + { + "epoch": 0.7195932733672272, + "grad_norm": 30.841323544868562, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 713657728, + "step": 7360 + }, + { + "epoch": 0.7195932733672272, + "loss": 0.10757029801607132, + "loss_ce": 0.00457347696647048, + "loss_iou": 0.326171875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 713657728, + "step": 7360 + }, + { + "epoch": 0.719691044192413, + "grad_norm": 7.354487587030459, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 713755664, + "step": 7361 + }, + { + "epoch": 0.719691044192413, + "loss": 0.0974479392170906, + "loss_ce": 0.004369329195469618, + "loss_iou": 0.40234375, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 713755664, + "step": 7361 + }, + { + "epoch": 0.7197888150175987, + "grad_norm": 23.301676487348363, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 713852356, + "step": 7362 + }, + { + "epoch": 0.7197888150175987, + "loss": 0.13641561567783356, + "loss_ce": 0.012544771656394005, + "loss_iou": 0.3359375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 713852356, + "step": 7362 + }, + { + "epoch": 0.7198865858427845, + "grad_norm": 4.867044508378819, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 713948328, + "step": 7363 + }, + { + "epoch": 0.7198865858427845, + "loss": 0.08799147605895996, + "loss_ce": 0.0022675981745123863, + "loss_iou": 0.341796875, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 713948328, + "step": 7363 + }, + { + "epoch": 0.7199843566679702, + "grad_norm": 10.105243129651498, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 714044664, + "step": 7364 + }, + { + "epoch": 0.7199843566679702, + "loss": 0.09697956591844559, + "loss_ce": 0.0059914058074355125, + "loss_iou": 0.283203125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 714044664, + "step": 7364 + }, + { + "epoch": 0.720082127493156, + "grad_norm": 7.7304270193165, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 714141544, + "step": 7365 + }, + { + "epoch": 0.720082127493156, + "loss": 0.059625640511512756, + "loss_ce": 0.0035801075864583254, + "loss_iou": 0.29296875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 714141544, + "step": 7365 + }, + { + "epoch": 0.7201798983183418, + "grad_norm": 8.100863391476066, + "learning_rate": 5e-05, + "loss": 0.1147, + "num_input_tokens_seen": 714238272, + "step": 7366 + }, + { + "epoch": 0.7201798983183418, + "loss": 0.07781898975372314, + "loss_ce": 0.004714124836027622, + "loss_iou": 0.357421875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 714238272, + "step": 7366 + }, + { + "epoch": 0.7202776691435275, + "grad_norm": 13.504626301436815, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 714335204, + "step": 7367 + }, + { + "epoch": 0.7202776691435275, + "loss": 0.07726272940635681, + "loss_ce": 0.00433335080742836, + "loss_iou": 0.27734375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 714335204, + "step": 7367 + }, + { + "epoch": 0.7203754399687133, + "grad_norm": 14.725746479251953, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 714431872, + "step": 7368 + }, + { + "epoch": 0.7203754399687133, + "loss": 0.0939815491437912, + "loss_ce": 0.007845691405236721, + "loss_iou": 0.35546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 714431872, + "step": 7368 + }, + { + "epoch": 0.7204732107938991, + "grad_norm": 10.70943066782381, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 714529524, + "step": 7369 + }, + { + "epoch": 0.7204732107938991, + "loss": 0.05794323608279228, + "loss_ce": 0.004506956785917282, + "loss_iou": 0.416015625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 714529524, + "step": 7369 + }, + { + "epoch": 0.7205709816190848, + "grad_norm": 6.017602730615195, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 714627044, + "step": 7370 + }, + { + "epoch": 0.7205709816190848, + "loss": 0.07533399760723114, + "loss_ce": 0.0033430340699851513, + "loss_iou": 0.294921875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 714627044, + "step": 7370 + }, + { + "epoch": 0.7206687524442706, + "grad_norm": 6.628064558556241, + "learning_rate": 5e-05, + "loss": 0.1236, + "num_input_tokens_seen": 714724536, + "step": 7371 + }, + { + "epoch": 0.7206687524442706, + "loss": 0.14090242981910706, + "loss_ce": 0.00821200292557478, + "loss_iou": 0.33203125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 714724536, + "step": 7371 + }, + { + "epoch": 0.7207665232694564, + "grad_norm": 5.628163750394549, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 714820204, + "step": 7372 + }, + { + "epoch": 0.7207665232694564, + "loss": 0.05930129438638687, + "loss_ce": 0.0032481318339705467, + "loss_iou": 0.349609375, + "loss_num": 0.01116943359375, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 714820204, + "step": 7372 + }, + { + "epoch": 0.7208642940946421, + "grad_norm": 17.65132199822234, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 714916300, + "step": 7373 + }, + { + "epoch": 0.7208642940946421, + "loss": 0.10366739332675934, + "loss_ce": 0.004088541958481073, + "loss_iou": 0.2109375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 714916300, + "step": 7373 + }, + { + "epoch": 0.720962064919828, + "grad_norm": 14.694917946693977, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 715013476, + "step": 7374 + }, + { + "epoch": 0.720962064919828, + "loss": 0.05787103250622749, + "loss_ce": 0.007028746884316206, + "loss_iou": 0.2890625, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 715013476, + "step": 7374 + }, + { + "epoch": 0.7210598357450136, + "grad_norm": 6.773560619414422, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 715111440, + "step": 7375 + }, + { + "epoch": 0.7210598357450136, + "loss": 0.08863116055727005, + "loss_ce": 0.0084919985383749, + "loss_iou": 0.25390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 715111440, + "step": 7375 + }, + { + "epoch": 0.7211576065701994, + "grad_norm": 7.789960790862997, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 715207396, + "step": 7376 + }, + { + "epoch": 0.7211576065701994, + "loss": 0.04301717132329941, + "loss_ce": 0.004404805134981871, + "loss_iou": 0.1865234375, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 715207396, + "step": 7376 + }, + { + "epoch": 0.7212553773953853, + "grad_norm": 7.604444851427509, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 715303708, + "step": 7377 + }, + { + "epoch": 0.7212553773953853, + "loss": 0.13523562252521515, + "loss_ce": 0.008305389434099197, + "loss_iou": 0.2373046875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 715303708, + "step": 7377 + }, + { + "epoch": 0.721353148220571, + "grad_norm": 10.983634771497833, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 715400484, + "step": 7378 + }, + { + "epoch": 0.721353148220571, + "loss": 0.05727732554078102, + "loss_ce": 0.008824948221445084, + "loss_iou": 0.2060546875, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 715400484, + "step": 7378 + }, + { + "epoch": 0.7214509190457568, + "grad_norm": 15.553861128183412, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 715497776, + "step": 7379 + }, + { + "epoch": 0.7214509190457568, + "loss": 0.06612738966941833, + "loss_ce": 0.009578313678503036, + "loss_iou": 0.263671875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 715497776, + "step": 7379 + }, + { + "epoch": 0.7215486898709426, + "grad_norm": 45.61595096748246, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 715595412, + "step": 7380 + }, + { + "epoch": 0.7215486898709426, + "loss": 0.07884016633033752, + "loss_ce": 0.0034922678023576736, + "loss_iou": 0.28125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 715595412, + "step": 7380 + }, + { + "epoch": 0.7216464606961283, + "grad_norm": 15.363673936300913, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 715692492, + "step": 7381 + }, + { + "epoch": 0.7216464606961283, + "loss": 0.10917401313781738, + "loss_ce": 0.004834413528442383, + "loss_iou": 0.37890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 715692492, + "step": 7381 + }, + { + "epoch": 0.7217442315213141, + "grad_norm": 16.71443744329, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 715790196, + "step": 7382 + }, + { + "epoch": 0.7217442315213141, + "loss": 0.12625247240066528, + "loss_ce": 0.01129275094717741, + "loss_iou": 0.287109375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 715790196, + "step": 7382 + }, + { + "epoch": 0.7218420023464998, + "grad_norm": 3.5396901816597564, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 715887636, + "step": 7383 + }, + { + "epoch": 0.7218420023464998, + "loss": 0.0503372885286808, + "loss_ce": 0.009603951126337051, + "loss_iou": 0.2578125, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 715887636, + "step": 7383 + }, + { + "epoch": 0.7219397731716856, + "grad_norm": 7.596932947614922, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 715985184, + "step": 7384 + }, + { + "epoch": 0.7219397731716856, + "loss": 0.07476850599050522, + "loss_ce": 0.009399855509400368, + "loss_iou": 0.259765625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 715985184, + "step": 7384 + }, + { + "epoch": 0.7220375439968714, + "grad_norm": 4.395816045503554, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 716080900, + "step": 7385 + }, + { + "epoch": 0.7220375439968714, + "loss": 0.0981297716498375, + "loss_ce": 0.005981937982141972, + "loss_iou": 0.158203125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 716080900, + "step": 7385 + }, + { + "epoch": 0.7221353148220571, + "grad_norm": 1.965198593698395, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 716178152, + "step": 7386 + }, + { + "epoch": 0.7221353148220571, + "loss": 0.0792963057756424, + "loss_ce": 0.00688572321087122, + "loss_iou": 0.294921875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 716178152, + "step": 7386 + }, + { + "epoch": 0.7222330856472429, + "grad_norm": 2.754702896038894, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 716274840, + "step": 7387 + }, + { + "epoch": 0.7222330856472429, + "loss": 0.0928487628698349, + "loss_ce": 0.0041494243778288364, + "loss_iou": 0.1552734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 716274840, + "step": 7387 + }, + { + "epoch": 0.7223308564724287, + "grad_norm": 13.506629784792981, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 716372544, + "step": 7388 + }, + { + "epoch": 0.7223308564724287, + "loss": 0.03295426070690155, + "loss_ce": 0.0053663719445466995, + "loss_iou": 0.2734375, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 716372544, + "step": 7388 + }, + { + "epoch": 0.7224286272976144, + "grad_norm": 27.11157406779715, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 716469328, + "step": 7389 + }, + { + "epoch": 0.7224286272976144, + "loss": 0.08644954860210419, + "loss_ce": 0.007164875045418739, + "loss_iou": 0.326171875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 716469328, + "step": 7389 + }, + { + "epoch": 0.7225263981228002, + "grad_norm": 13.797856655078082, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 716565660, + "step": 7390 + }, + { + "epoch": 0.7225263981228002, + "loss": 0.13007798790931702, + "loss_ce": 0.006146100349724293, + "loss_iou": 0.310546875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 716565660, + "step": 7390 + }, + { + "epoch": 0.7226241689479859, + "grad_norm": 2.767017892934051, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 716662776, + "step": 7391 + }, + { + "epoch": 0.7226241689479859, + "loss": 0.077122762799263, + "loss_ce": 0.0033465144224464893, + "loss_iou": 0.2890625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 716662776, + "step": 7391 + }, + { + "epoch": 0.7227219397731717, + "grad_norm": 5.895905828523519, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 716759308, + "step": 7392 + }, + { + "epoch": 0.7227219397731717, + "loss": 0.0703265517950058, + "loss_ce": 0.011862500570714474, + "loss_iou": 0.25, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 716759308, + "step": 7392 + }, + { + "epoch": 0.7228197105983575, + "grad_norm": 24.30121532486036, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 716856072, + "step": 7393 + }, + { + "epoch": 0.7228197105983575, + "loss": 0.06475187093019485, + "loss_ce": 0.0015194497536867857, + "loss_iou": 0.296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 716856072, + "step": 7393 + }, + { + "epoch": 0.7229174814235432, + "grad_norm": 8.331804347713938, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 716953624, + "step": 7394 + }, + { + "epoch": 0.7229174814235432, + "loss": 0.08954861760139465, + "loss_ce": 0.005686315707862377, + "loss_iou": 0.2373046875, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 716953624, + "step": 7394 + }, + { + "epoch": 0.723015252248729, + "grad_norm": 9.60165946603922, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 717050392, + "step": 7395 + }, + { + "epoch": 0.723015252248729, + "loss": 0.0640178769826889, + "loss_ce": 0.0030819042585790157, + "loss_iou": 0.259765625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 717050392, + "step": 7395 + }, + { + "epoch": 0.7231130230739148, + "grad_norm": 4.043252823620926, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 717147584, + "step": 7396 + }, + { + "epoch": 0.7231130230739148, + "loss": 0.11600588262081146, + "loss_ce": 0.005742059554904699, + "loss_iou": 0.2197265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 717147584, + "step": 7396 + }, + { + "epoch": 0.7232107938991005, + "grad_norm": 5.525274394528271, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 717244760, + "step": 7397 + }, + { + "epoch": 0.7232107938991005, + "loss": 0.09390975534915924, + "loss_ce": 0.006934659089893103, + "loss_iou": 0.1669921875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 717244760, + "step": 7397 + }, + { + "epoch": 0.7233085647242863, + "grad_norm": 16.288466322778746, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 717341408, + "step": 7398 + }, + { + "epoch": 0.7233085647242863, + "loss": 0.06906712055206299, + "loss_ce": 0.0025731297209858894, + "loss_iou": 0.263671875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 717341408, + "step": 7398 + }, + { + "epoch": 0.7234063355494721, + "grad_norm": 18.1452519468345, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 717438112, + "step": 7399 + }, + { + "epoch": 0.7234063355494721, + "loss": 0.156034916639328, + "loss_ce": 0.008848646655678749, + "loss_iou": 0.26953125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 717438112, + "step": 7399 + }, + { + "epoch": 0.7235041063746578, + "grad_norm": 7.867904513182147, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 717534784, + "step": 7400 + }, + { + "epoch": 0.7235041063746578, + "loss": 0.07193787395954132, + "loss_ce": 0.007725072093307972, + "loss_iou": 0.265625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 717534784, + "step": 7400 + }, + { + "epoch": 0.7236018771998436, + "grad_norm": 3.665116630797373, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 717631464, + "step": 7401 + }, + { + "epoch": 0.7236018771998436, + "loss": 0.05625119060277939, + "loss_ce": 0.003654144238680601, + "loss_iou": 0.287109375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 717631464, + "step": 7401 + }, + { + "epoch": 0.7236996480250293, + "grad_norm": 10.597490424055053, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 717729332, + "step": 7402 + }, + { + "epoch": 0.7236996480250293, + "loss": 0.10704369097948074, + "loss_ce": 0.003329694736748934, + "loss_iou": 0.2412109375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 717729332, + "step": 7402 + }, + { + "epoch": 0.7237974188502151, + "grad_norm": 14.373418872154643, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 717825364, + "step": 7403 + }, + { + "epoch": 0.7237974188502151, + "loss": 0.08841833472251892, + "loss_ce": 0.004403434228152037, + "loss_iou": 0.37890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 717825364, + "step": 7403 + }, + { + "epoch": 0.7238951896754009, + "grad_norm": 5.206420448488197, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 717922436, + "step": 7404 + }, + { + "epoch": 0.7238951896754009, + "loss": 0.08925291895866394, + "loss_ce": 0.002773731481283903, + "loss_iou": 0.40234375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 717922436, + "step": 7404 + }, + { + "epoch": 0.7239929605005866, + "grad_norm": 10.379643057212071, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 718019656, + "step": 7405 + }, + { + "epoch": 0.7239929605005866, + "loss": 0.06579715013504028, + "loss_ce": 0.005227384623140097, + "loss_iou": 0.294921875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 718019656, + "step": 7405 + }, + { + "epoch": 0.7240907313257724, + "grad_norm": 5.980345953629045, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 718116260, + "step": 7406 + }, + { + "epoch": 0.7240907313257724, + "loss": 0.06366018205881119, + "loss_ce": 0.0037236593198031187, + "loss_iou": 0.3203125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 718116260, + "step": 7406 + }, + { + "epoch": 0.7241885021509582, + "grad_norm": 16.842671412494266, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 718213556, + "step": 7407 + }, + { + "epoch": 0.7241885021509582, + "loss": 0.12510456144809723, + "loss_ce": 0.008695254102349281, + "loss_iou": 0.31640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 718213556, + "step": 7407 + }, + { + "epoch": 0.7242862729761439, + "grad_norm": 6.4700629579317015, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 718310196, + "step": 7408 + }, + { + "epoch": 0.7242862729761439, + "loss": 0.06513027846813202, + "loss_ce": 0.005193756427615881, + "loss_iou": 0.30859375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 718310196, + "step": 7408 + }, + { + "epoch": 0.7243840438013297, + "grad_norm": 2.6920405751403007, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 718405976, + "step": 7409 + }, + { + "epoch": 0.7243840438013297, + "loss": 0.04981721192598343, + "loss_ce": 0.004803784191608429, + "loss_iou": 0.248046875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 718405976, + "step": 7409 + }, + { + "epoch": 0.7244818146265154, + "grad_norm": 7.767896710875024, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 718502872, + "step": 7410 + }, + { + "epoch": 0.7244818146265154, + "loss": 0.08215382695198059, + "loss_ce": 0.004875693004578352, + "loss_iou": 0.265625, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 718502872, + "step": 7410 + }, + { + "epoch": 0.7245795854517012, + "grad_norm": 7.400336712585531, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 718600540, + "step": 7411 + }, + { + "epoch": 0.7245795854517012, + "loss": 0.04504556953907013, + "loss_ce": 0.0032135972287505865, + "loss_iou": 0.28515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 718600540, + "step": 7411 + }, + { + "epoch": 0.724677356276887, + "grad_norm": 10.141185031158582, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 718697064, + "step": 7412 + }, + { + "epoch": 0.724677356276887, + "loss": 0.06257905066013336, + "loss_ce": 0.007540592923760414, + "loss_iou": 0.333984375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 718697064, + "step": 7412 + }, + { + "epoch": 0.7247751271020727, + "grad_norm": 7.974808054509292, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 718794060, + "step": 7413 + }, + { + "epoch": 0.7247751271020727, + "loss": 0.10591799765825272, + "loss_ce": 0.006506987381726503, + "loss_iou": 0.302734375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 718794060, + "step": 7413 + }, + { + "epoch": 0.7248728979272585, + "grad_norm": 9.955720748493079, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 718891224, + "step": 7414 + }, + { + "epoch": 0.7248728979272585, + "loss": 0.10300449281930923, + "loss_ce": 0.004966775421053171, + "loss_iou": 0.353515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 718891224, + "step": 7414 + }, + { + "epoch": 0.7249706687524443, + "grad_norm": 12.644630830183171, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 718987540, + "step": 7415 + }, + { + "epoch": 0.7249706687524443, + "loss": 0.10037562996149063, + "loss_ce": 0.003711194731295109, + "loss_iou": 0.291015625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 718987540, + "step": 7415 + }, + { + "epoch": 0.72506843957763, + "grad_norm": 6.302656553304143, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 719083880, + "step": 7416 + }, + { + "epoch": 0.72506843957763, + "loss": 0.06561977416276932, + "loss_ce": 0.005309411324560642, + "loss_iou": 0.271484375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 719083880, + "step": 7416 + }, + { + "epoch": 0.7251662104028158, + "grad_norm": 6.129790340253924, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 719181204, + "step": 7417 + }, + { + "epoch": 0.7251662104028158, + "loss": 0.06367965787649155, + "loss_ce": 0.008076629601418972, + "loss_iou": 0.34375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 719181204, + "step": 7417 + }, + { + "epoch": 0.7252639812280016, + "grad_norm": 5.262047688404738, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 719277044, + "step": 7418 + }, + { + "epoch": 0.7252639812280016, + "loss": 0.0735086053609848, + "loss_ce": 0.004821161739528179, + "loss_iou": 0.2109375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 719277044, + "step": 7418 + }, + { + "epoch": 0.7253617520531873, + "grad_norm": 2.8526181087289206, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 719374500, + "step": 7419 + }, + { + "epoch": 0.7253617520531873, + "loss": 0.07233580946922302, + "loss_ce": 0.009049981832504272, + "loss_iou": 0.296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 719374500, + "step": 7419 + }, + { + "epoch": 0.7254595228783731, + "grad_norm": 6.119723886868096, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 719471988, + "step": 7420 + }, + { + "epoch": 0.7254595228783731, + "loss": 0.05257592722773552, + "loss_ce": 0.004777771886438131, + "loss_iou": 0.318359375, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 719471988, + "step": 7420 + }, + { + "epoch": 0.7255572937035588, + "grad_norm": 7.859377940562871, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 719568992, + "step": 7421 + }, + { + "epoch": 0.7255572937035588, + "loss": 0.0317387729883194, + "loss_ce": 0.001053348183631897, + "loss_iou": 0.326171875, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 719568992, + "step": 7421 + }, + { + "epoch": 0.7256550645287446, + "grad_norm": 27.89954178407209, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 719666092, + "step": 7422 + }, + { + "epoch": 0.7256550645287446, + "loss": 0.06706508249044418, + "loss_ce": 0.003561819903552532, + "loss_iou": 0.203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 719666092, + "step": 7422 + }, + { + "epoch": 0.7257528353539304, + "grad_norm": 13.444341807324369, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 719761604, + "step": 7423 + }, + { + "epoch": 0.7257528353539304, + "loss": 0.08507955074310303, + "loss_ce": 0.005634660832583904, + "loss_iou": 0.2451171875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 719761604, + "step": 7423 + }, + { + "epoch": 0.7258506061791161, + "grad_norm": 21.4486049958422, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 719858720, + "step": 7424 + }, + { + "epoch": 0.7258506061791161, + "loss": 0.05778183788061142, + "loss_ce": 0.0030867066234350204, + "loss_iou": 0.26171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 719858720, + "step": 7424 + }, + { + "epoch": 0.7259483770043019, + "grad_norm": 28.93935289253711, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 719955948, + "step": 7425 + }, + { + "epoch": 0.7259483770043019, + "loss": 0.05799907073378563, + "loss_ce": 0.004257615655660629, + "loss_iou": 0.298828125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 719955948, + "step": 7425 + }, + { + "epoch": 0.7260461478294877, + "grad_norm": 7.927887286105068, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 720053408, + "step": 7426 + }, + { + "epoch": 0.7260461478294877, + "loss": 0.08701281249523163, + "loss_ce": 0.0060649351216852665, + "loss_iou": 0.314453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 720053408, + "step": 7426 + }, + { + "epoch": 0.7261439186546734, + "grad_norm": 5.394571340127799, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 720149940, + "step": 7427 + }, + { + "epoch": 0.7261439186546734, + "loss": 0.061304353177547455, + "loss_ce": 0.003977084066718817, + "loss_iou": 0.3359375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 720149940, + "step": 7427 + }, + { + "epoch": 0.7262416894798592, + "grad_norm": 6.527921847163009, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 720246840, + "step": 7428 + }, + { + "epoch": 0.7262416894798592, + "loss": 0.09121919423341751, + "loss_ce": 0.0070440866984426975, + "loss_iou": 0.296875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 720246840, + "step": 7428 + }, + { + "epoch": 0.7263394603050449, + "grad_norm": 11.069250038140321, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 720343736, + "step": 7429 + }, + { + "epoch": 0.7263394603050449, + "loss": 0.04309326410293579, + "loss_ce": 0.002764287171885371, + "loss_iou": 0.3046875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 720343736, + "step": 7429 + }, + { + "epoch": 0.7264372311302307, + "grad_norm": 7.132839296248947, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 720440896, + "step": 7430 + }, + { + "epoch": 0.7264372311302307, + "loss": 0.08373124897480011, + "loss_ce": 0.007910333573818207, + "loss_iou": 0.3359375, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 720440896, + "step": 7430 + }, + { + "epoch": 0.7265350019554165, + "grad_norm": 3.3984378464252956, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 720537124, + "step": 7431 + }, + { + "epoch": 0.7265350019554165, + "loss": 0.10269264876842499, + "loss_ce": 0.0027933549135923386, + "loss_iou": 0.271484375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 720537124, + "step": 7431 + }, + { + "epoch": 0.7266327727806022, + "grad_norm": 23.64279912614125, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 720635148, + "step": 7432 + }, + { + "epoch": 0.7266327727806022, + "loss": 0.06784676015377045, + "loss_ce": 0.002935867290943861, + "loss_iou": 0.390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 720635148, + "step": 7432 + }, + { + "epoch": 0.726730543605788, + "grad_norm": 18.191870579573077, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 720730720, + "step": 7433 + }, + { + "epoch": 0.726730543605788, + "loss": 0.09166842699050903, + "loss_ce": 0.007462797686457634, + "loss_iou": 0.2109375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 720730720, + "step": 7433 + }, + { + "epoch": 0.7268283144309738, + "grad_norm": 9.56346365953325, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 720827112, + "step": 7434 + }, + { + "epoch": 0.7268283144309738, + "loss": 0.09042598307132721, + "loss_ce": 0.0061974674463272095, + "loss_iou": 0.3203125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 720827112, + "step": 7434 + }, + { + "epoch": 0.7269260852561595, + "grad_norm": 17.12252347950196, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 720923536, + "step": 7435 + }, + { + "epoch": 0.7269260852561595, + "loss": 0.08476525545120239, + "loss_ce": 0.0026271918322890997, + "loss_iou": 0.2197265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 720923536, + "step": 7435 + }, + { + "epoch": 0.7270238560813453, + "grad_norm": 29.54799128947193, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 721020928, + "step": 7436 + }, + { + "epoch": 0.7270238560813453, + "loss": 0.08829785138368607, + "loss_ce": 0.008662230335175991, + "loss_iou": 0.31640625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 721020928, + "step": 7436 + }, + { + "epoch": 0.727121626906531, + "grad_norm": 9.52887794482034, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 721117988, + "step": 7437 + }, + { + "epoch": 0.727121626906531, + "loss": 0.0795033797621727, + "loss_ce": 0.004063928499817848, + "loss_iou": 0.318359375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 721117988, + "step": 7437 + }, + { + "epoch": 0.7272193977317168, + "grad_norm": 15.990247321175929, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 721214784, + "step": 7438 + }, + { + "epoch": 0.7272193977317168, + "loss": 0.11014033854007721, + "loss_ce": 0.0036339852958917618, + "loss_iou": 0.255859375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 721214784, + "step": 7438 + }, + { + "epoch": 0.7273171685569026, + "grad_norm": 17.994452572447354, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 721312672, + "step": 7439 + }, + { + "epoch": 0.7273171685569026, + "loss": 0.10813131928443909, + "loss_ce": 0.009086515754461288, + "loss_iou": 0.294921875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 721312672, + "step": 7439 + }, + { + "epoch": 0.7274149393820883, + "grad_norm": 9.017223481660606, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 721408504, + "step": 7440 + }, + { + "epoch": 0.7274149393820883, + "loss": 0.07406740635633469, + "loss_ce": 0.009660059586167336, + "loss_iou": 0.283203125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 721408504, + "step": 7440 + }, + { + "epoch": 0.7275127102072741, + "grad_norm": 9.123481521473924, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 721504556, + "step": 7441 + }, + { + "epoch": 0.7275127102072741, + "loss": 0.043619319796562195, + "loss_ce": 0.004572076257318258, + "loss_iou": 0.2041015625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 721504556, + "step": 7441 + }, + { + "epoch": 0.72761048103246, + "grad_norm": 7.261906890991165, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 721601984, + "step": 7442 + }, + { + "epoch": 0.72761048103246, + "loss": 0.06773616373538971, + "loss_ce": 0.006533159874379635, + "loss_iou": 0.27734375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 721601984, + "step": 7442 + }, + { + "epoch": 0.7277082518576456, + "grad_norm": 16.359509056589275, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 721698992, + "step": 7443 + }, + { + "epoch": 0.7277082518576456, + "loss": 0.07348820567131042, + "loss_ce": 0.005456897430121899, + "loss_iou": 0.33984375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 721698992, + "step": 7443 + }, + { + "epoch": 0.7278060226828315, + "grad_norm": 15.440313065178128, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 721796372, + "step": 7444 + }, + { + "epoch": 0.7278060226828315, + "loss": 0.10636942088603973, + "loss_ce": 0.005539339035749435, + "loss_iou": 0.3515625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 721796372, + "step": 7444 + }, + { + "epoch": 0.7279037935080173, + "grad_norm": 18.495405643841302, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 721892968, + "step": 7445 + }, + { + "epoch": 0.7279037935080173, + "loss": 0.06823691725730896, + "loss_ce": 0.01267203688621521, + "loss_iou": 0.34765625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 721892968, + "step": 7445 + }, + { + "epoch": 0.728001564333203, + "grad_norm": 7.8715047339963515, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 721989020, + "step": 7446 + }, + { + "epoch": 0.728001564333203, + "loss": 0.06370152533054352, + "loss_ce": 0.006109131965786219, + "loss_iou": 0.2255859375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 721989020, + "step": 7446 + }, + { + "epoch": 0.7280993351583888, + "grad_norm": 8.556724783088303, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 722086204, + "step": 7447 + }, + { + "epoch": 0.7280993351583888, + "loss": 0.08196483552455902, + "loss_ce": 0.006983148865401745, + "loss_iou": 0.28125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 722086204, + "step": 7447 + }, + { + "epoch": 0.7281971059835745, + "grad_norm": 6.1681993262931725, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 722184256, + "step": 7448 + }, + { + "epoch": 0.7281971059835745, + "loss": 0.1545214056968689, + "loss_ce": 0.008853381499648094, + "loss_iou": 0.28515625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 722184256, + "step": 7448 + }, + { + "epoch": 0.7282948768087603, + "grad_norm": 6.5745903998862705, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 722281532, + "step": 7449 + }, + { + "epoch": 0.7282948768087603, + "loss": 0.05024333298206329, + "loss_ce": 0.0049399882555007935, + "loss_iou": 0.283203125, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 722281532, + "step": 7449 + }, + { + "epoch": 0.7283926476339461, + "grad_norm": 4.391234481310486, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 722378772, + "step": 7450 + }, + { + "epoch": 0.7283926476339461, + "loss": 0.07235132157802582, + "loss_ce": 0.004831180442124605, + "loss_iou": 0.2578125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 722378772, + "step": 7450 + }, + { + "epoch": 0.7284904184591318, + "grad_norm": 8.994223131716845, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 722475196, + "step": 7451 + }, + { + "epoch": 0.7284904184591318, + "loss": 0.08554503321647644, + "loss_ce": 0.0045284852385520935, + "loss_iou": 0.2490234375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 722475196, + "step": 7451 + }, + { + "epoch": 0.7285881892843176, + "grad_norm": 20.546394794990196, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 722572080, + "step": 7452 + }, + { + "epoch": 0.7285881892843176, + "loss": 0.0534784309566021, + "loss_ce": 0.007076452486217022, + "loss_iou": 0.30859375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 722572080, + "step": 7452 + }, + { + "epoch": 0.7286859601095034, + "grad_norm": 29.0810450301209, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 722669128, + "step": 7453 + }, + { + "epoch": 0.7286859601095034, + "loss": 0.055054835975170135, + "loss_ce": 0.003571681212633848, + "loss_iou": 0.216796875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 722669128, + "step": 7453 + }, + { + "epoch": 0.7287837309346891, + "grad_norm": 10.504154176608392, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 722766788, + "step": 7454 + }, + { + "epoch": 0.7287837309346891, + "loss": 0.0951496809720993, + "loss_ce": 0.008754422888159752, + "loss_iou": 0.357421875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 722766788, + "step": 7454 + }, + { + "epoch": 0.7288815017598749, + "grad_norm": 4.89843844585472, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 722864776, + "step": 7455 + }, + { + "epoch": 0.7288815017598749, + "loss": 0.06636073440313339, + "loss_ce": 0.0038378494791686535, + "loss_iou": 0.2353515625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 722864776, + "step": 7455 + }, + { + "epoch": 0.7289792725850606, + "grad_norm": 3.0836870647906376, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 722961484, + "step": 7456 + }, + { + "epoch": 0.7289792725850606, + "loss": 0.10193474590778351, + "loss_ce": 0.008505183272063732, + "loss_iou": 0.42578125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 722961484, + "step": 7456 + }, + { + "epoch": 0.7290770434102464, + "grad_norm": 11.26796795578164, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 723058440, + "step": 7457 + }, + { + "epoch": 0.7290770434102464, + "loss": 0.10046692192554474, + "loss_ce": 0.0047790552489459515, + "loss_iou": 0.3203125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 723058440, + "step": 7457 + }, + { + "epoch": 0.7291748142354322, + "grad_norm": 3.2628927050589547, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 723154796, + "step": 7458 + }, + { + "epoch": 0.7291748142354322, + "loss": 0.07608751952648163, + "loss_ce": 0.003989744000136852, + "loss_iou": 0.1962890625, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 723154796, + "step": 7458 + }, + { + "epoch": 0.7292725850606179, + "grad_norm": 4.892622503450447, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 723252084, + "step": 7459 + }, + { + "epoch": 0.7292725850606179, + "loss": 0.036702923476696014, + "loss_ce": 0.002736857393756509, + "loss_iou": 0.251953125, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 723252084, + "step": 7459 + }, + { + "epoch": 0.7293703558858037, + "grad_norm": 2.896865513579897, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 723348796, + "step": 7460 + }, + { + "epoch": 0.7293703558858037, + "loss": 0.05629480257630348, + "loss_ce": 0.005605105310678482, + "loss_iou": 0.1806640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 723348796, + "step": 7460 + }, + { + "epoch": 0.7294681267109895, + "grad_norm": 5.297297325949509, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 723445944, + "step": 7461 + }, + { + "epoch": 0.7294681267109895, + "loss": 0.07492640614509583, + "loss_ce": 0.0018825847655534744, + "loss_iou": 0.298828125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 723445944, + "step": 7461 + }, + { + "epoch": 0.7295658975361752, + "grad_norm": 4.3815843963192425, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 723542976, + "step": 7462 + }, + { + "epoch": 0.7295658975361752, + "loss": 0.09112271666526794, + "loss_ce": 0.006772125605493784, + "loss_iou": 0.34765625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 723542976, + "step": 7462 + }, + { + "epoch": 0.729663668361361, + "grad_norm": 4.291491414966957, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 723640232, + "step": 7463 + }, + { + "epoch": 0.729663668361361, + "loss": 0.0814126580953598, + "loss_ce": 0.005019534844905138, + "loss_iou": 0.318359375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 723640232, + "step": 7463 + }, + { + "epoch": 0.7297614391865468, + "grad_norm": 6.432609896393545, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 723738004, + "step": 7464 + }, + { + "epoch": 0.7297614391865468, + "loss": 0.05786735191941261, + "loss_ce": 0.005087202414870262, + "loss_iou": 0.318359375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 723738004, + "step": 7464 + }, + { + "epoch": 0.7298592100117325, + "grad_norm": 3.9163433080660894, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 723834940, + "step": 7465 + }, + { + "epoch": 0.7298592100117325, + "loss": 0.06618525832891464, + "loss_ce": 0.006279250141233206, + "loss_iou": 0.265625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 723834940, + "step": 7465 + }, + { + "epoch": 0.7299569808369183, + "grad_norm": 12.185057474917322, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 723932068, + "step": 7466 + }, + { + "epoch": 0.7299569808369183, + "loss": 0.09298722445964813, + "loss_ce": 0.004757089540362358, + "loss_iou": 0.2734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 723932068, + "step": 7466 + }, + { + "epoch": 0.730054751662104, + "grad_norm": 8.612319913634343, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 724029272, + "step": 7467 + }, + { + "epoch": 0.730054751662104, + "loss": 0.07444530725479126, + "loss_ce": 0.0072608585469424725, + "loss_iou": 0.283203125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 724029272, + "step": 7467 + }, + { + "epoch": 0.7301525224872898, + "grad_norm": 9.000208624795837, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 724127140, + "step": 7468 + }, + { + "epoch": 0.7301525224872898, + "loss": 0.0503530278801918, + "loss_ce": 0.005164122674614191, + "loss_iou": 0.314453125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 724127140, + "step": 7468 + }, + { + "epoch": 0.7302502933124756, + "grad_norm": 16.58838835989137, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 724224304, + "step": 7469 + }, + { + "epoch": 0.7302502933124756, + "loss": 0.08044980466365814, + "loss_ce": 0.007039773277938366, + "loss_iou": 0.37109375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 724224304, + "step": 7469 + }, + { + "epoch": 0.7303480641376613, + "grad_norm": 9.76728201115522, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 724321464, + "step": 7470 + }, + { + "epoch": 0.7303480641376613, + "loss": 0.10463712364435196, + "loss_ce": 0.008171053603291512, + "loss_iou": 0.1962890625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 724321464, + "step": 7470 + }, + { + "epoch": 0.7304458349628471, + "grad_norm": 12.154196256719622, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 724418904, + "step": 7471 + }, + { + "epoch": 0.7304458349628471, + "loss": 0.06288006156682968, + "loss_ce": 0.005102655850350857, + "loss_iou": 0.306640625, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 724418904, + "step": 7471 + }, + { + "epoch": 0.7305436057880329, + "grad_norm": 4.1510976964281, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 724515808, + "step": 7472 + }, + { + "epoch": 0.7305436057880329, + "loss": 0.07936196029186249, + "loss_ce": 0.0040369494818151, + "loss_iou": 0.25, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 724515808, + "step": 7472 + }, + { + "epoch": 0.7306413766132186, + "grad_norm": 5.869540540424718, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 724612756, + "step": 7473 + }, + { + "epoch": 0.7306413766132186, + "loss": 0.0668894350528717, + "loss_ce": 0.012400300242006779, + "loss_iou": 0.369140625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 724612756, + "step": 7473 + }, + { + "epoch": 0.7307391474384044, + "grad_norm": 17.202568538273805, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 724710004, + "step": 7474 + }, + { + "epoch": 0.7307391474384044, + "loss": 0.10475560277700424, + "loss_ce": 0.008472641929984093, + "loss_iou": 0.2890625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 724710004, + "step": 7474 + }, + { + "epoch": 0.7308369182635901, + "grad_norm": 16.010745421336207, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 724807184, + "step": 7475 + }, + { + "epoch": 0.7308369182635901, + "loss": 0.0900164470076561, + "loss_ce": 0.0040179104544222355, + "loss_iou": 0.263671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 724807184, + "step": 7475 + }, + { + "epoch": 0.7309346890887759, + "grad_norm": 4.161637591629185, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 724904180, + "step": 7476 + }, + { + "epoch": 0.7309346890887759, + "loss": 0.05757513269782066, + "loss_ce": 0.008228207938373089, + "loss_iou": 0.23046875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 724904180, + "step": 7476 + }, + { + "epoch": 0.7310324599139617, + "grad_norm": 3.9954387073040247, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 725001008, + "step": 7477 + }, + { + "epoch": 0.7310324599139617, + "loss": 0.04621502757072449, + "loss_ce": 0.007404293864965439, + "loss_iou": 0.302734375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 725001008, + "step": 7477 + }, + { + "epoch": 0.7311302307391474, + "grad_norm": 1.9924241445779094, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 725098708, + "step": 7478 + }, + { + "epoch": 0.7311302307391474, + "loss": 0.024181727319955826, + "loss_ce": 0.001217250945046544, + "loss_iou": 0.205078125, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 725098708, + "step": 7478 + }, + { + "epoch": 0.7312280015643332, + "grad_norm": 6.126779692081477, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 725195488, + "step": 7479 + }, + { + "epoch": 0.7312280015643332, + "loss": 0.043455224484205246, + "loss_ce": 0.0022412354592233896, + "loss_iou": 0.341796875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 725195488, + "step": 7479 + }, + { + "epoch": 0.731325772389519, + "grad_norm": 3.792803583580258, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 725291468, + "step": 7480 + }, + { + "epoch": 0.731325772389519, + "loss": 0.059484388679265976, + "loss_ce": 0.004959012847393751, + "loss_iou": 0.283203125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 725291468, + "step": 7480 + }, + { + "epoch": 0.7314235432147047, + "grad_norm": 32.268531196954896, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 725386792, + "step": 7481 + }, + { + "epoch": 0.7314235432147047, + "loss": 0.04552429914474487, + "loss_ce": 0.002906501991674304, + "loss_iou": 0.1669921875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 725386792, + "step": 7481 + }, + { + "epoch": 0.7315213140398905, + "grad_norm": 49.412435877284196, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 725484320, + "step": 7482 + }, + { + "epoch": 0.7315213140398905, + "loss": 0.07768881320953369, + "loss_ce": 0.004400853533297777, + "loss_iou": 0.34765625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 725484320, + "step": 7482 + }, + { + "epoch": 0.7316190848650762, + "grad_norm": 9.320575744426339, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 725580884, + "step": 7483 + }, + { + "epoch": 0.7316190848650762, + "loss": 0.04693256691098213, + "loss_ce": 0.002560007618740201, + "loss_iou": 0.1904296875, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 725580884, + "step": 7483 + }, + { + "epoch": 0.731716855690262, + "grad_norm": 9.431265523530948, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 725678332, + "step": 7484 + }, + { + "epoch": 0.731716855690262, + "loss": 0.09164391458034515, + "loss_ce": 0.00367699284106493, + "loss_iou": 0.2890625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 725678332, + "step": 7484 + }, + { + "epoch": 0.7318146265154478, + "grad_norm": 10.969481121197207, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 725775792, + "step": 7485 + }, + { + "epoch": 0.7318146265154478, + "loss": 0.0446658730506897, + "loss_ce": 0.002231183461844921, + "loss_iou": 0.2890625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 725775792, + "step": 7485 + }, + { + "epoch": 0.7319123973406335, + "grad_norm": 6.283514227736547, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 725873680, + "step": 7486 + }, + { + "epoch": 0.7319123973406335, + "loss": 0.09887215495109558, + "loss_ce": 0.01024910993874073, + "loss_iou": 0.259765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 725873680, + "step": 7486 + }, + { + "epoch": 0.7320101681658193, + "grad_norm": 2.77949342194466, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 725970312, + "step": 7487 + }, + { + "epoch": 0.7320101681658193, + "loss": 0.0662035346031189, + "loss_ce": 0.005282818339765072, + "loss_iou": 0.263671875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 725970312, + "step": 7487 + }, + { + "epoch": 0.7321079389910051, + "grad_norm": 5.846762774123728, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 726066856, + "step": 7488 + }, + { + "epoch": 0.7321079389910051, + "loss": 0.09402279555797577, + "loss_ce": 0.008077667094767094, + "loss_iou": 0.19921875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 726066856, + "step": 7488 + }, + { + "epoch": 0.7322057098161908, + "grad_norm": 3.684423048863519, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 726164104, + "step": 7489 + }, + { + "epoch": 0.7322057098161908, + "loss": 0.11864741146564484, + "loss_ce": 0.006205393932759762, + "loss_iou": 0.408203125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 726164104, + "step": 7489 + }, + { + "epoch": 0.7323034806413766, + "grad_norm": 4.78806014085058, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 726260716, + "step": 7490 + }, + { + "epoch": 0.7323034806413766, + "loss": 0.06472029536962509, + "loss_ce": 0.005973956547677517, + "loss_iou": 0.1904296875, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 726260716, + "step": 7490 + }, + { + "epoch": 0.7324012514665624, + "grad_norm": 9.308535423749536, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 726357604, + "step": 7491 + }, + { + "epoch": 0.7324012514665624, + "loss": 0.034260399639606476, + "loss_ce": 0.002869255607947707, + "loss_iou": 0.189453125, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 726357604, + "step": 7491 + }, + { + "epoch": 0.7324990222917481, + "grad_norm": 1.8624736947833247, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 726453720, + "step": 7492 + }, + { + "epoch": 0.7324990222917481, + "loss": 0.07195077836513519, + "loss_ce": 0.007852421142160892, + "loss_iou": 0.21875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 726453720, + "step": 7492 + }, + { + "epoch": 0.7325967931169339, + "grad_norm": 18.78309143247775, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 726550508, + "step": 7493 + }, + { + "epoch": 0.7325967931169339, + "loss": 0.10890714079141617, + "loss_ce": 0.003652013372629881, + "loss_iou": 0.296875, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 726550508, + "step": 7493 + }, + { + "epoch": 0.7326945639421196, + "grad_norm": 7.492220089293271, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 726648136, + "step": 7494 + }, + { + "epoch": 0.7326945639421196, + "loss": 0.0818386822938919, + "loss_ce": 0.011358333751559258, + "loss_iou": 0.197265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 726648136, + "step": 7494 + }, + { + "epoch": 0.7327923347673054, + "grad_norm": 18.326900034422415, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 726744364, + "step": 7495 + }, + { + "epoch": 0.7327923347673054, + "loss": 0.09943418204784393, + "loss_ce": 0.007835670374333858, + "loss_iou": 0.16015625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 726744364, + "step": 7495 + }, + { + "epoch": 0.7328901055924912, + "grad_norm": 4.809120557202586, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 726841532, + "step": 7496 + }, + { + "epoch": 0.7328901055924912, + "loss": 0.07893645018339157, + "loss_ce": 0.008379808627068996, + "loss_iou": 0.298828125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 726841532, + "step": 7496 + }, + { + "epoch": 0.7329878764176769, + "grad_norm": 2.6255361402946567, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 726938032, + "step": 7497 + }, + { + "epoch": 0.7329878764176769, + "loss": 0.05693378672003746, + "loss_ce": 0.00797596201300621, + "loss_iou": 0.197265625, + "loss_num": 0.009765625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 726938032, + "step": 7497 + }, + { + "epoch": 0.7330856472428627, + "grad_norm": 2.8021596948592427, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 727035072, + "step": 7498 + }, + { + "epoch": 0.7330856472428627, + "loss": 0.040579408407211304, + "loss_ce": 0.0034242593683302402, + "loss_iou": 0.271484375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 727035072, + "step": 7498 + }, + { + "epoch": 0.7331834180680485, + "grad_norm": 4.1276673369022685, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 727131072, + "step": 7499 + }, + { + "epoch": 0.7331834180680485, + "loss": 0.04310101270675659, + "loss_ce": 0.003687559859827161, + "loss_iou": 0.283203125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 727131072, + "step": 7499 + }, + { + "epoch": 0.7332811888932342, + "grad_norm": 13.202980047620857, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 727228660, + "step": 7500 + }, + { + "epoch": 0.7332811888932342, + "eval_seeclick_CIoU": 0.3887053355574608, + "eval_seeclick_GIoU": 0.3924810588359833, + "eval_seeclick_IoU": 0.4723699688911438, + "eval_seeclick_MAE_all": 0.12198929116129875, + "eval_seeclick_MAE_h": 0.04579135961830616, + "eval_seeclick_MAE_w": 0.18825270980596542, + "eval_seeclick_MAE_x": 0.20857451111078262, + "eval_seeclick_MAE_y": 0.045338572934269905, + "eval_seeclick_NUM_probability": 0.9999833106994629, + "eval_seeclick_inside_bbox": 0.6931818127632141, + "eval_seeclick_loss": 0.34965869784355164, + "eval_seeclick_loss_ce": 0.027088407427072525, + "eval_seeclick_loss_iou": 0.3841552734375, + "eval_seeclick_loss_num": 0.06153106689453125, + "eval_seeclick_loss_xval": 0.307830810546875, + "eval_seeclick_runtime": 76.7471, + "eval_seeclick_samples_per_second": 0.56, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 727228660, + "step": 7500 + }, + { + "epoch": 0.7332811888932342, + "eval_icons_CIoU": 0.6829911768436432, + "eval_icons_GIoU": 0.6793608665466309, + "eval_icons_IoU": 0.7079089879989624, + "eval_icons_MAE_all": 0.05493761785328388, + "eval_icons_MAE_h": 0.06022905744612217, + "eval_icons_MAE_w": 0.05152469128370285, + "eval_icons_MAE_x": 0.0520807933062315, + "eval_icons_MAE_y": 0.055915918201208115, + "eval_icons_NUM_probability": 0.9999972283840179, + "eval_icons_inside_bbox": 0.7916666567325592, + "eval_icons_loss": 0.17849449813365936, + "eval_icons_loss_ce": 1.53044068156305e-05, + "eval_icons_loss_iou": 0.4483642578125, + "eval_icons_loss_num": 0.039768218994140625, + "eval_icons_loss_xval": 0.1988983154296875, + "eval_icons_runtime": 86.9189, + "eval_icons_samples_per_second": 0.575, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 727228660, + "step": 7500 + }, + { + "epoch": 0.7332811888932342, + "eval_screenspot_CIoU": 0.3264344682296117, + "eval_screenspot_GIoU": 0.30656326313813526, + "eval_screenspot_IoU": 0.4203055401643117, + "eval_screenspot_MAE_all": 0.16091242929299673, + "eval_screenspot_MAE_h": 0.12696204831202826, + "eval_screenspot_MAE_w": 0.20831905553738275, + "eval_screenspot_MAE_x": 0.18529821932315826, + "eval_screenspot_MAE_y": 0.12307040890057881, + "eval_screenspot_NUM_probability": 0.9999937216440836, + "eval_screenspot_inside_bbox": 0.6775000095367432, + "eval_screenspot_loss": 0.5740771889686584, + "eval_screenspot_loss_ce": 0.024227401241660118, + "eval_screenspot_loss_iou": 0.3866373697916667, + "eval_screenspot_loss_num": 0.11115519205729167, + "eval_screenspot_loss_xval": 0.5557861328125, + "eval_screenspot_runtime": 148.0004, + "eval_screenspot_samples_per_second": 0.601, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 727228660, + "step": 7500 + }, + { + "epoch": 0.7332811888932342, + "eval_compot_CIoU": 0.3947072774171829, + "eval_compot_GIoU": 0.38015736639499664, + "eval_compot_IoU": 0.4790147393941879, + "eval_compot_MAE_all": 0.10505901277065277, + "eval_compot_MAE_h": 0.08897611871361732, + "eval_compot_MAE_w": 0.11833581328392029, + "eval_compot_MAE_x": 0.11988677829504013, + "eval_compot_MAE_y": 0.09303732216358185, + "eval_compot_NUM_probability": 0.9999651312828064, + "eval_compot_inside_bbox": 0.6770833432674408, + "eval_compot_loss": 0.3462944030761719, + "eval_compot_loss_ce": 0.024396827444434166, + "eval_compot_loss_iou": 0.38922119140625, + "eval_compot_loss_num": 0.058414459228515625, + "eval_compot_loss_xval": 0.29193115234375, + "eval_compot_runtime": 87.3816, + "eval_compot_samples_per_second": 0.572, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 727228660, + "step": 7500 + }, + { + "epoch": 0.7332811888932342, + "loss": 0.27529799938201904, + "loss_ce": 0.02358902245759964, + "loss_iou": 0.390625, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 727228660, + "step": 7500 + }, + { + "epoch": 0.73337895971842, + "grad_norm": 12.115374398880471, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 727325516, + "step": 7501 + }, + { + "epoch": 0.73337895971842, + "loss": 0.10547736287117004, + "loss_ce": 0.008034739643335342, + "loss_iou": 0.31640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 727325516, + "step": 7501 + }, + { + "epoch": 0.7334767305436057, + "grad_norm": 10.481266218974705, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 727423488, + "step": 7502 + }, + { + "epoch": 0.7334767305436057, + "loss": 0.0897112637758255, + "loss_ce": 0.007435878273099661, + "loss_iou": 0.36328125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 727423488, + "step": 7502 + }, + { + "epoch": 0.7335745013687915, + "grad_norm": 7.211131600367706, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 727521016, + "step": 7503 + }, + { + "epoch": 0.7335745013687915, + "loss": 0.09746348857879639, + "loss_ce": 0.006399035919457674, + "loss_iou": 0.33984375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 727521016, + "step": 7503 + }, + { + "epoch": 0.7336722721939773, + "grad_norm": 2.329262214358069, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 727617032, + "step": 7504 + }, + { + "epoch": 0.7336722721939773, + "loss": 0.10076090693473816, + "loss_ce": 0.006354779936373234, + "loss_iou": 0.22265625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 727617032, + "step": 7504 + }, + { + "epoch": 0.733770043019163, + "grad_norm": 4.48491619963114, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 727714480, + "step": 7505 + }, + { + "epoch": 0.733770043019163, + "loss": 0.03954894095659256, + "loss_ce": 0.0020733559504151344, + "loss_iou": 0.23828125, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 727714480, + "step": 7505 + }, + { + "epoch": 0.7338678138443488, + "grad_norm": 3.3565673465578176, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 727810556, + "step": 7506 + }, + { + "epoch": 0.7338678138443488, + "loss": 0.09382621198892593, + "loss_ce": 0.008239662274718285, + "loss_iou": 0.1748046875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 727810556, + "step": 7506 + }, + { + "epoch": 0.7339655846695347, + "grad_norm": 4.277548315631708, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 727907096, + "step": 7507 + }, + { + "epoch": 0.7339655846695347, + "loss": 0.09153085947036743, + "loss_ce": 0.008996068499982357, + "loss_iou": 0.28515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 727907096, + "step": 7507 + }, + { + "epoch": 0.7340633554947203, + "grad_norm": 7.354681185078744, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 728003576, + "step": 7508 + }, + { + "epoch": 0.7340633554947203, + "loss": 0.1059439480304718, + "loss_ce": 0.005388522055000067, + "loss_iou": 0.23046875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 728003576, + "step": 7508 + }, + { + "epoch": 0.7341611263199062, + "grad_norm": 11.203810783689244, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 728100308, + "step": 7509 + }, + { + "epoch": 0.7341611263199062, + "loss": 0.056105274707078934, + "loss_ce": 0.002005236456170678, + "loss_iou": 0.2412109375, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 728100308, + "step": 7509 + }, + { + "epoch": 0.734258897145092, + "grad_norm": 14.017904902268533, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 728196804, + "step": 7510 + }, + { + "epoch": 0.734258897145092, + "loss": 0.12599097192287445, + "loss_ce": 0.005248174071311951, + "loss_iou": 0.30859375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 728196804, + "step": 7510 + }, + { + "epoch": 0.7343566679702777, + "grad_norm": 13.26820050637294, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 728294136, + "step": 7511 + }, + { + "epoch": 0.7343566679702777, + "loss": 0.0536457858979702, + "loss_ce": 0.0028187595307826996, + "loss_iou": 0.27734375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 728294136, + "step": 7511 + }, + { + "epoch": 0.7344544387954635, + "grad_norm": 11.513413696187682, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 728391156, + "step": 7512 + }, + { + "epoch": 0.7344544387954635, + "loss": 0.07901022583246231, + "loss_ce": 0.006424169056117535, + "loss_iou": 0.34375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 728391156, + "step": 7512 + }, + { + "epoch": 0.7345522096206492, + "grad_norm": 5.321792925423004, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 728488784, + "step": 7513 + }, + { + "epoch": 0.7345522096206492, + "loss": 0.10798010975122452, + "loss_ce": 0.009141306392848492, + "loss_iou": 0.2578125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 728488784, + "step": 7513 + }, + { + "epoch": 0.734649980445835, + "grad_norm": 3.7591417835576335, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 728585684, + "step": 7514 + }, + { + "epoch": 0.734649980445835, + "loss": 0.08872276544570923, + "loss_ce": 0.004849012475460768, + "loss_iou": 0.291015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 728585684, + "step": 7514 + }, + { + "epoch": 0.7347477512710208, + "grad_norm": 12.585690293747387, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 728682032, + "step": 7515 + }, + { + "epoch": 0.7347477512710208, + "loss": 0.12026185542345047, + "loss_ce": 0.00944108422845602, + "loss_iou": 0.34375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 728682032, + "step": 7515 + }, + { + "epoch": 0.7348455220962065, + "grad_norm": 7.4084376903977, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 728779460, + "step": 7516 + }, + { + "epoch": 0.7348455220962065, + "loss": 0.09461559355258942, + "loss_ce": 0.009181632660329342, + "loss_iou": 0.3046875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 728779460, + "step": 7516 + }, + { + "epoch": 0.7349432929213923, + "grad_norm": 7.59927013863775, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 728876632, + "step": 7517 + }, + { + "epoch": 0.7349432929213923, + "loss": 0.0962241142988205, + "loss_ce": 0.0062735434621572495, + "loss_iou": 0.37890625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 728876632, + "step": 7517 + }, + { + "epoch": 0.7350410637465781, + "grad_norm": 16.58585235344798, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 728973968, + "step": 7518 + }, + { + "epoch": 0.7350410637465781, + "loss": 0.06438064575195312, + "loss_ce": 0.005329134874045849, + "loss_iou": 0.419921875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 728973968, + "step": 7518 + }, + { + "epoch": 0.7351388345717638, + "grad_norm": 12.541442029569629, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 729070828, + "step": 7519 + }, + { + "epoch": 0.7351388345717638, + "loss": 0.08445532619953156, + "loss_ce": 0.005643676966428757, + "loss_iou": 0.369140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 729070828, + "step": 7519 + }, + { + "epoch": 0.7352366053969496, + "grad_norm": 49.86532718986262, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 729167508, + "step": 7520 + }, + { + "epoch": 0.7352366053969496, + "loss": 0.06274005770683289, + "loss_ce": 0.003299439325928688, + "loss_iou": 0.275390625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 729167508, + "step": 7520 + }, + { + "epoch": 0.7353343762221353, + "grad_norm": 19.49451368110527, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 729264664, + "step": 7521 + }, + { + "epoch": 0.7353343762221353, + "loss": 0.08394023776054382, + "loss_ce": 0.002347677480429411, + "loss_iou": 0.294921875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 729264664, + "step": 7521 + }, + { + "epoch": 0.7354321470473211, + "grad_norm": 3.5990587760030364, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 729361440, + "step": 7522 + }, + { + "epoch": 0.7354321470473211, + "loss": 0.09741255640983582, + "loss_ce": 0.00589796993881464, + "loss_iou": 0.29296875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 729361440, + "step": 7522 + }, + { + "epoch": 0.7355299178725069, + "grad_norm": 3.2369384957653127, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 729457940, + "step": 7523 + }, + { + "epoch": 0.7355299178725069, + "loss": 0.05665791034698486, + "loss_ce": 0.007082107476890087, + "loss_iou": 0.2275390625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 729457940, + "step": 7523 + }, + { + "epoch": 0.7356276886976926, + "grad_norm": 4.130649424262465, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 729555008, + "step": 7524 + }, + { + "epoch": 0.7356276886976926, + "loss": 0.06418441236019135, + "loss_ce": 0.0015623451909050345, + "loss_iou": 0.27734375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 729555008, + "step": 7524 + }, + { + "epoch": 0.7357254595228784, + "grad_norm": 12.798003649692976, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 729652344, + "step": 7525 + }, + { + "epoch": 0.7357254595228784, + "loss": 0.058382317423820496, + "loss_ce": 0.003160760272294283, + "loss_iou": 0.345703125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 729652344, + "step": 7525 + }, + { + "epoch": 0.7358232303480642, + "grad_norm": 19.13495284439823, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 729748228, + "step": 7526 + }, + { + "epoch": 0.7358232303480642, + "loss": 0.08432339876890182, + "loss_ce": 0.0036654388532042503, + "loss_iou": 0.263671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 729748228, + "step": 7526 + }, + { + "epoch": 0.7359210011732499, + "grad_norm": 8.372750306571419, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 729845788, + "step": 7527 + }, + { + "epoch": 0.7359210011732499, + "loss": 0.043533094227313995, + "loss_ce": 0.0038564270362257957, + "loss_iou": 0.28515625, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 729845788, + "step": 7527 + }, + { + "epoch": 0.7360187719984357, + "grad_norm": 12.157122489714395, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 729942932, + "step": 7528 + }, + { + "epoch": 0.7360187719984357, + "loss": 0.08768022060394287, + "loss_ce": 0.0035432667937129736, + "loss_iou": 0.33203125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 729942932, + "step": 7528 + }, + { + "epoch": 0.7361165428236214, + "grad_norm": 4.8657742884278665, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 730039500, + "step": 7529 + }, + { + "epoch": 0.7361165428236214, + "loss": 0.07446279376745224, + "loss_ce": 0.007383251097053289, + "loss_iou": 0.2275390625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 730039500, + "step": 7529 + }, + { + "epoch": 0.7362143136488072, + "grad_norm": 6.622111279620691, + "learning_rate": 5e-05, + "loss": 0.1146, + "num_input_tokens_seen": 730136584, + "step": 7530 + }, + { + "epoch": 0.7362143136488072, + "loss": 0.10340213030576706, + "loss_ce": 0.007225981447845697, + "loss_iou": 0.259765625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 730136584, + "step": 7530 + }, + { + "epoch": 0.736312084473993, + "grad_norm": 7.029383037588519, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 730233068, + "step": 7531 + }, + { + "epoch": 0.736312084473993, + "loss": 0.043041013181209564, + "loss_ce": 0.005092402920126915, + "loss_iou": 0.345703125, + "loss_num": 0.007598876953125, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 730233068, + "step": 7531 + }, + { + "epoch": 0.7364098552991787, + "grad_norm": 14.426291061759322, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 730329636, + "step": 7532 + }, + { + "epoch": 0.7364098552991787, + "loss": 0.10966327786445618, + "loss_ce": 0.006407049484550953, + "loss_iou": 0.2353515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 730329636, + "step": 7532 + }, + { + "epoch": 0.7365076261243645, + "grad_norm": 6.063429912505339, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 730427084, + "step": 7533 + }, + { + "epoch": 0.7365076261243645, + "loss": 0.05118109658360481, + "loss_ce": 0.0038178155664354563, + "loss_iou": 0.330078125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 730427084, + "step": 7533 + }, + { + "epoch": 0.7366053969495503, + "grad_norm": 7.21278464941549, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 730524312, + "step": 7534 + }, + { + "epoch": 0.7366053969495503, + "loss": 0.07077951729297638, + "loss_ce": 0.008401586674153805, + "loss_iou": 0.298828125, + "loss_num": 0.012451171875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 730524312, + "step": 7534 + }, + { + "epoch": 0.736703167774736, + "grad_norm": 8.58809102338671, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 730620952, + "step": 7535 + }, + { + "epoch": 0.736703167774736, + "loss": 0.07005783915519714, + "loss_ce": 0.006062482949346304, + "loss_iou": 0.265625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 730620952, + "step": 7535 + }, + { + "epoch": 0.7368009385999218, + "grad_norm": 5.292748612976121, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 730717908, + "step": 7536 + }, + { + "epoch": 0.7368009385999218, + "loss": 0.1389893889427185, + "loss_ce": 0.0044526453129947186, + "loss_iou": 0.271484375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 730717908, + "step": 7536 + }, + { + "epoch": 0.7368987094251076, + "grad_norm": 21.740973268529515, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 730813688, + "step": 7537 + }, + { + "epoch": 0.7368987094251076, + "loss": 0.10456520318984985, + "loss_ce": 0.005482260137796402, + "loss_iou": 0.23828125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 730813688, + "step": 7537 + }, + { + "epoch": 0.7369964802502933, + "grad_norm": 5.510545876583029, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 730911288, + "step": 7538 + }, + { + "epoch": 0.7369964802502933, + "loss": 0.07369092106819153, + "loss_ce": 0.005432639271020889, + "loss_iou": 0.310546875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 730911288, + "step": 7538 + }, + { + "epoch": 0.7370942510754791, + "grad_norm": 49.38650809082599, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 731008272, + "step": 7539 + }, + { + "epoch": 0.7370942510754791, + "loss": 0.09459622204303741, + "loss_ce": 0.003463102737441659, + "loss_iou": 0.279296875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 731008272, + "step": 7539 + }, + { + "epoch": 0.7371920219006648, + "grad_norm": 83.2583243823987, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 731105560, + "step": 7540 + }, + { + "epoch": 0.7371920219006648, + "loss": 0.10311733186244965, + "loss_ce": 0.005453453399240971, + "loss_iou": 0.380859375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 731105560, + "step": 7540 + }, + { + "epoch": 0.7372897927258506, + "grad_norm": 7.080936320335027, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 731202692, + "step": 7541 + }, + { + "epoch": 0.7372897927258506, + "loss": 0.059028297662734985, + "loss_ce": 0.0012013053055852652, + "loss_iou": 0.28125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 731202692, + "step": 7541 + }, + { + "epoch": 0.7373875635510364, + "grad_norm": 8.788091479055991, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 731299304, + "step": 7542 + }, + { + "epoch": 0.7373875635510364, + "loss": 0.0767577588558197, + "loss_ce": 0.0024321957025676966, + "loss_iou": 0.212890625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 731299304, + "step": 7542 + }, + { + "epoch": 0.7374853343762221, + "grad_norm": 1.642861139964167, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 731395356, + "step": 7543 + }, + { + "epoch": 0.7374853343762221, + "loss": 0.04308134317398071, + "loss_ce": 0.0013142186217010021, + "loss_iou": 0.349609375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 731395356, + "step": 7543 + }, + { + "epoch": 0.7375831052014079, + "grad_norm": 7.507147908302638, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 731492120, + "step": 7544 + }, + { + "epoch": 0.7375831052014079, + "loss": 0.04907413572072983, + "loss_ce": 0.0015163056086748838, + "loss_iou": 0.291015625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 731492120, + "step": 7544 + }, + { + "epoch": 0.7376808760265937, + "grad_norm": 19.026847223906717, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 731589248, + "step": 7545 + }, + { + "epoch": 0.7376808760265937, + "loss": 0.08761946856975555, + "loss_ce": 0.00790755171328783, + "loss_iou": 0.314453125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 731589248, + "step": 7545 + }, + { + "epoch": 0.7377786468517794, + "grad_norm": 6.169237255451742, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 731686968, + "step": 7546 + }, + { + "epoch": 0.7377786468517794, + "loss": 0.054477062076330185, + "loss_ce": 0.004031505901366472, + "loss_iou": 0.248046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 731686968, + "step": 7546 + }, + { + "epoch": 0.7378764176769652, + "grad_norm": 6.324226729455266, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 731784092, + "step": 7547 + }, + { + "epoch": 0.7378764176769652, + "loss": 0.09338057041168213, + "loss_ce": 0.005879046395421028, + "loss_iou": 0.2041015625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 731784092, + "step": 7547 + }, + { + "epoch": 0.7379741885021509, + "grad_norm": 7.631316214071917, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 731880456, + "step": 7548 + }, + { + "epoch": 0.7379741885021509, + "loss": 0.039544716477394104, + "loss_ce": 0.004426611587405205, + "loss_iou": 0.1748046875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 731880456, + "step": 7548 + }, + { + "epoch": 0.7380719593273367, + "grad_norm": 24.77465284129047, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 731978036, + "step": 7549 + }, + { + "epoch": 0.7380719593273367, + "loss": 0.06951593607664108, + "loss_ce": 0.0032775336876511574, + "loss_iou": 0.40234375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 731978036, + "step": 7549 + }, + { + "epoch": 0.7381697301525225, + "grad_norm": 4.796680769907745, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 732075124, + "step": 7550 + }, + { + "epoch": 0.7381697301525225, + "loss": 0.07271949201822281, + "loss_ce": 0.0019492300925776362, + "loss_iou": 0.34375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 732075124, + "step": 7550 + }, + { + "epoch": 0.7382675009777082, + "grad_norm": 3.5875168297409687, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 732171052, + "step": 7551 + }, + { + "epoch": 0.7382675009777082, + "loss": 0.038315001875162125, + "loss_ce": 0.004142943769693375, + "loss_iou": 0.2216796875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 732171052, + "step": 7551 + }, + { + "epoch": 0.738365271802894, + "grad_norm": 16.998231522501207, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 732268372, + "step": 7552 + }, + { + "epoch": 0.738365271802894, + "loss": 0.10905593633651733, + "loss_ce": 0.002702179830521345, + "loss_iou": 0.267578125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 732268372, + "step": 7552 + }, + { + "epoch": 0.7384630426280798, + "grad_norm": 2.287231355384476, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 732365924, + "step": 7553 + }, + { + "epoch": 0.7384630426280798, + "loss": 0.06432417780160904, + "loss_ce": 0.01017836295068264, + "loss_iou": 0.2001953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 732365924, + "step": 7553 + }, + { + "epoch": 0.7385608134532655, + "grad_norm": 22.43659046643204, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 732463684, + "step": 7554 + }, + { + "epoch": 0.7385608134532655, + "loss": 0.10765300691127777, + "loss_ce": 0.00857006199657917, + "loss_iou": 0.302734375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 732463684, + "step": 7554 + }, + { + "epoch": 0.7386585842784513, + "grad_norm": 6.148031632626701, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 732560184, + "step": 7555 + }, + { + "epoch": 0.7386585842784513, + "loss": 0.08951011300086975, + "loss_ce": 0.002512127161026001, + "loss_iou": 0.259765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 732560184, + "step": 7555 + }, + { + "epoch": 0.7387563551036371, + "grad_norm": 8.240109316372314, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 732656180, + "step": 7556 + }, + { + "epoch": 0.7387563551036371, + "loss": 0.09730714559555054, + "loss_ce": 0.008500995114445686, + "loss_iou": 0.2451171875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 732656180, + "step": 7556 + }, + { + "epoch": 0.7388541259288228, + "grad_norm": 4.357827860774232, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 732753264, + "step": 7557 + }, + { + "epoch": 0.7388541259288228, + "loss": 0.11929450184106827, + "loss_ce": 0.005357131827622652, + "loss_iou": 0.2109375, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 732753264, + "step": 7557 + }, + { + "epoch": 0.7389518967540086, + "grad_norm": 2.483048515854955, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 732850812, + "step": 7558 + }, + { + "epoch": 0.7389518967540086, + "loss": 0.07167735695838928, + "loss_ce": 0.0046607558615505695, + "loss_iou": 0.2890625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 732850812, + "step": 7558 + }, + { + "epoch": 0.7390496675791943, + "grad_norm": 26.738369304202944, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 732948188, + "step": 7559 + }, + { + "epoch": 0.7390496675791943, + "loss": 0.12721718847751617, + "loss_ce": 0.003651513485237956, + "loss_iou": 0.255859375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 732948188, + "step": 7559 + }, + { + "epoch": 0.7391474384043801, + "grad_norm": 7.179038939196772, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 733044848, + "step": 7560 + }, + { + "epoch": 0.7391474384043801, + "loss": 0.08242418617010117, + "loss_ce": 0.007122058887034655, + "loss_iou": 0.330078125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 733044848, + "step": 7560 + }, + { + "epoch": 0.7392452092295659, + "grad_norm": 7.961215820196037, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 733140996, + "step": 7561 + }, + { + "epoch": 0.7392452092295659, + "loss": 0.064150370657444, + "loss_ce": 0.005220926366746426, + "loss_iou": 0.25390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 733140996, + "step": 7561 + }, + { + "epoch": 0.7393429800547516, + "grad_norm": 2.947082111485984, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 733237692, + "step": 7562 + }, + { + "epoch": 0.7393429800547516, + "loss": 0.0712382048368454, + "loss_ce": 0.006220508366823196, + "loss_iou": 0.234375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 733237692, + "step": 7562 + }, + { + "epoch": 0.7394407508799374, + "grad_norm": 6.3959078790819035, + "learning_rate": 5e-05, + "loss": 0.1049, + "num_input_tokens_seen": 733334216, + "step": 7563 + }, + { + "epoch": 0.7394407508799374, + "loss": 0.11643818765878677, + "loss_ce": 0.010069167241454124, + "loss_iou": 0.2216796875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 733334216, + "step": 7563 + }, + { + "epoch": 0.7395385217051232, + "grad_norm": 2.8251031429242484, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 733431704, + "step": 7564 + }, + { + "epoch": 0.7395385217051232, + "loss": 0.06723636388778687, + "loss_ce": 0.003988686017692089, + "loss_iou": 0.2412109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 733431704, + "step": 7564 + }, + { + "epoch": 0.7396362925303089, + "grad_norm": 1.6486049965826377, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 733528240, + "step": 7565 + }, + { + "epoch": 0.7396362925303089, + "loss": 0.05967147648334503, + "loss_ce": 0.0061436425894498825, + "loss_iou": 0.2177734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 733528240, + "step": 7565 + }, + { + "epoch": 0.7397340633554947, + "grad_norm": 5.319599164067132, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 733625100, + "step": 7566 + }, + { + "epoch": 0.7397340633554947, + "loss": 0.0685802549123764, + "loss_ce": 0.003791440511122346, + "loss_iou": 0.2421875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 733625100, + "step": 7566 + }, + { + "epoch": 0.7398318341806804, + "grad_norm": 26.779653573864884, + "learning_rate": 5e-05, + "loss": 0.1019, + "num_input_tokens_seen": 733721980, + "step": 7567 + }, + { + "epoch": 0.7398318341806804, + "loss": 0.12489373236894608, + "loss_ce": 0.009567808359861374, + "loss_iou": 0.29296875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 733721980, + "step": 7567 + }, + { + "epoch": 0.7399296050058662, + "grad_norm": 7.392469513521677, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 733818764, + "step": 7568 + }, + { + "epoch": 0.7399296050058662, + "loss": 0.06655550748109818, + "loss_ce": 0.005375394597649574, + "loss_iou": 0.326171875, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 733818764, + "step": 7568 + }, + { + "epoch": 0.740027375831052, + "grad_norm": 11.51523778741479, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 733915152, + "step": 7569 + }, + { + "epoch": 0.740027375831052, + "loss": 0.07108830660581589, + "loss_ce": 0.004224290139973164, + "loss_iou": 0.2578125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 733915152, + "step": 7569 + }, + { + "epoch": 0.7401251466562377, + "grad_norm": 19.441061093327136, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 734011820, + "step": 7570 + }, + { + "epoch": 0.7401251466562377, + "loss": 0.11401881277561188, + "loss_ce": 0.007115739397704601, + "loss_iou": 0.341796875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 734011820, + "step": 7570 + }, + { + "epoch": 0.7402229174814235, + "grad_norm": 10.957494327402358, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 734108524, + "step": 7571 + }, + { + "epoch": 0.7402229174814235, + "loss": 0.04692920297384262, + "loss_ce": 0.006939730606973171, + "loss_iou": 0.287109375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 734108524, + "step": 7571 + }, + { + "epoch": 0.7403206883066094, + "grad_norm": 8.123965163406485, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 734205716, + "step": 7572 + }, + { + "epoch": 0.7403206883066094, + "loss": 0.07008951902389526, + "loss_ce": 0.006765545811504126, + "loss_iou": 0.279296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 734205716, + "step": 7572 + }, + { + "epoch": 0.740418459131795, + "grad_norm": 6.65849780614749, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 734303040, + "step": 7573 + }, + { + "epoch": 0.740418459131795, + "loss": 0.06638153642416, + "loss_ce": 0.004095159471035004, + "loss_iou": 0.2734375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 734303040, + "step": 7573 + }, + { + "epoch": 0.7405162299569809, + "grad_norm": 11.183024118610733, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 734399816, + "step": 7574 + }, + { + "epoch": 0.7405162299569809, + "loss": 0.0381164513528347, + "loss_ce": 0.002105709398165345, + "loss_iou": 0.251953125, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 734399816, + "step": 7574 + }, + { + "epoch": 0.7406140007821665, + "grad_norm": 12.032936012558988, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 734496776, + "step": 7575 + }, + { + "epoch": 0.7406140007821665, + "loss": 0.12426348030567169, + "loss_ce": 0.006831837818026543, + "loss_iou": 0.271484375, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 734496776, + "step": 7575 + }, + { + "epoch": 0.7407117716073524, + "grad_norm": 10.023030941683245, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 734593592, + "step": 7576 + }, + { + "epoch": 0.7407117716073524, + "loss": 0.0448525995016098, + "loss_ce": 0.004554136656224728, + "loss_iou": 0.328125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 734593592, + "step": 7576 + }, + { + "epoch": 0.7408095424325382, + "grad_norm": 9.407848146665302, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 734690032, + "step": 7577 + }, + { + "epoch": 0.7408095424325382, + "loss": 0.03653249889612198, + "loss_ce": 0.002596954582259059, + "loss_iou": 0.31640625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 734690032, + "step": 7577 + }, + { + "epoch": 0.7409073132577239, + "grad_norm": 14.465682499743973, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 734787136, + "step": 7578 + }, + { + "epoch": 0.7409073132577239, + "loss": 0.101500004529953, + "loss_ce": 0.002836667001247406, + "loss_iou": 0.41015625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 734787136, + "step": 7578 + }, + { + "epoch": 0.7410050840829097, + "grad_norm": 13.427995415835666, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 734883972, + "step": 7579 + }, + { + "epoch": 0.7410050840829097, + "loss": 0.09412610530853271, + "loss_ce": 0.007288333028554916, + "loss_iou": 0.36328125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 734883972, + "step": 7579 + }, + { + "epoch": 0.7411028549080955, + "grad_norm": 5.852660844911325, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 734981672, + "step": 7580 + }, + { + "epoch": 0.7411028549080955, + "loss": 0.05711175128817558, + "loss_ce": 0.006116877309978008, + "loss_iou": 0.3359375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 734981672, + "step": 7580 + }, + { + "epoch": 0.7412006257332812, + "grad_norm": 7.7885892395335246, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 735079412, + "step": 7581 + }, + { + "epoch": 0.7412006257332812, + "loss": 0.07493309676647186, + "loss_ce": 0.0026522106491029263, + "loss_iou": 0.380859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 735079412, + "step": 7581 + }, + { + "epoch": 0.741298396558467, + "grad_norm": 17.493501676887234, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 735176520, + "step": 7582 + }, + { + "epoch": 0.741298396558467, + "loss": 0.06745807826519012, + "loss_ce": 0.0040730638429522514, + "loss_iou": 0.322265625, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 735176520, + "step": 7582 + }, + { + "epoch": 0.7413961673836528, + "grad_norm": 6.388765107095204, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 735273900, + "step": 7583 + }, + { + "epoch": 0.7413961673836528, + "loss": 0.06145861744880676, + "loss_ce": 0.0038032846059650183, + "loss_iou": 0.2119140625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 735273900, + "step": 7583 + }, + { + "epoch": 0.7414939382088385, + "grad_norm": 4.845765987783287, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 735370712, + "step": 7584 + }, + { + "epoch": 0.7414939382088385, + "loss": 0.07164815813302994, + "loss_ce": 0.006538905669003725, + "loss_iou": 0.1884765625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 735370712, + "step": 7584 + }, + { + "epoch": 0.7415917090340243, + "grad_norm": 3.762408323714268, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 735467876, + "step": 7585 + }, + { + "epoch": 0.7415917090340243, + "loss": 0.0853818953037262, + "loss_ce": 0.00609723012894392, + "loss_iou": 0.328125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 735467876, + "step": 7585 + }, + { + "epoch": 0.74168947985921, + "grad_norm": 2.4654161808357284, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 735565376, + "step": 7586 + }, + { + "epoch": 0.74168947985921, + "loss": 0.07007824629545212, + "loss_ce": 0.006212584674358368, + "loss_iou": 0.271484375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 735565376, + "step": 7586 + }, + { + "epoch": 0.7417872506843958, + "grad_norm": 10.80637602075543, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 735663084, + "step": 7587 + }, + { + "epoch": 0.7417872506843958, + "loss": 0.14100825786590576, + "loss_ce": 0.007722729817032814, + "loss_iou": 0.318359375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 735663084, + "step": 7587 + }, + { + "epoch": 0.7418850215095816, + "grad_norm": 11.16703495758067, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 735760976, + "step": 7588 + }, + { + "epoch": 0.7418850215095816, + "loss": 0.059802159667015076, + "loss_ce": 0.0020476439967751503, + "loss_iou": 0.30078125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 735760976, + "step": 7588 + }, + { + "epoch": 0.7419827923347673, + "grad_norm": 14.777884288464495, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 735857924, + "step": 7589 + }, + { + "epoch": 0.7419827923347673, + "loss": 0.09294494241476059, + "loss_ce": 0.003589474828913808, + "loss_iou": 0.1748046875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 735857924, + "step": 7589 + }, + { + "epoch": 0.7420805631599531, + "grad_norm": 5.047362980471169, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 735954136, + "step": 7590 + }, + { + "epoch": 0.7420805631599531, + "loss": 0.05848725140094757, + "loss_ce": 0.00757248979061842, + "loss_iou": 0.2109375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 735954136, + "step": 7590 + }, + { + "epoch": 0.7421783339851389, + "grad_norm": 7.905828204002585, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 736051164, + "step": 7591 + }, + { + "epoch": 0.7421783339851389, + "loss": 0.07708539068698883, + "loss_ce": 0.005819214507937431, + "loss_iou": 0.29296875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 736051164, + "step": 7591 + }, + { + "epoch": 0.7422761048103246, + "grad_norm": 32.5358258122713, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 736147320, + "step": 7592 + }, + { + "epoch": 0.7422761048103246, + "loss": 0.05432090163230896, + "loss_ce": 0.0037227594293653965, + "loss_iou": 0.287109375, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 736147320, + "step": 7592 + }, + { + "epoch": 0.7423738756355104, + "grad_norm": 24.709070876235177, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 736244920, + "step": 7593 + }, + { + "epoch": 0.7423738756355104, + "loss": 0.06602250039577484, + "loss_ce": 0.0070472839288413525, + "loss_iou": 0.279296875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 736244920, + "step": 7593 + }, + { + "epoch": 0.7424716464606961, + "grad_norm": 4.02355084917981, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 736341608, + "step": 7594 + }, + { + "epoch": 0.7424716464606961, + "loss": 0.05858629569411278, + "loss_ce": 0.008484063670039177, + "loss_iou": 0.2109375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 736341608, + "step": 7594 + }, + { + "epoch": 0.7425694172858819, + "grad_norm": 6.7455195587357695, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 736438416, + "step": 7595 + }, + { + "epoch": 0.7425694172858819, + "loss": 0.09009453654289246, + "loss_ce": 0.005885093007236719, + "loss_iou": 0.275390625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 736438416, + "step": 7595 + }, + { + "epoch": 0.7426671881110677, + "grad_norm": 7.022142713879582, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 736535288, + "step": 7596 + }, + { + "epoch": 0.7426671881110677, + "loss": 0.06135724484920502, + "loss_ce": 0.006692638620734215, + "loss_iou": 0.326171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 736535288, + "step": 7596 + }, + { + "epoch": 0.7427649589362534, + "grad_norm": 12.090747023276663, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 736632040, + "step": 7597 + }, + { + "epoch": 0.7427649589362534, + "loss": 0.11625993251800537, + "loss_ce": 0.00680864043533802, + "loss_iou": 0.306640625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 736632040, + "step": 7597 + }, + { + "epoch": 0.7428627297614392, + "grad_norm": 11.737364074565024, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 736729484, + "step": 7598 + }, + { + "epoch": 0.7428627297614392, + "loss": 0.12082205712795258, + "loss_ce": 0.010775672271847725, + "loss_iou": 0.3203125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 736729484, + "step": 7598 + }, + { + "epoch": 0.742960500586625, + "grad_norm": 6.659934646223716, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 736826300, + "step": 7599 + }, + { + "epoch": 0.742960500586625, + "loss": 0.06008543819189072, + "loss_ce": 0.003498220117762685, + "loss_iou": 0.283203125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 736826300, + "step": 7599 + }, + { + "epoch": 0.7430582714118107, + "grad_norm": 5.777603675893686, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 736923728, + "step": 7600 + }, + { + "epoch": 0.7430582714118107, + "loss": 0.050701454281806946, + "loss_ce": 0.0062678600661456585, + "loss_iou": 0.25, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 736923728, + "step": 7600 + }, + { + "epoch": 0.7431560422369965, + "grad_norm": 15.511584761210319, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 737020904, + "step": 7601 + }, + { + "epoch": 0.7431560422369965, + "loss": 0.08475509285926819, + "loss_ce": 0.0072862240485847, + "loss_iou": 0.427734375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 737020904, + "step": 7601 + }, + { + "epoch": 0.7432538130621823, + "grad_norm": 10.813508015318662, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 737118332, + "step": 7602 + }, + { + "epoch": 0.7432538130621823, + "loss": 0.060782380402088165, + "loss_ce": 0.0022572961170226336, + "loss_iou": 0.423828125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 737118332, + "step": 7602 + }, + { + "epoch": 0.743351583887368, + "grad_norm": 12.259939089634974, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 737215368, + "step": 7603 + }, + { + "epoch": 0.743351583887368, + "loss": 0.10977092385292053, + "loss_ce": 0.006316336803138256, + "loss_iou": 0.345703125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 737215368, + "step": 7603 + }, + { + "epoch": 0.7434493547125538, + "grad_norm": 5.968748101651973, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 737311532, + "step": 7604 + }, + { + "epoch": 0.7434493547125538, + "loss": 0.07085657119750977, + "loss_ce": 0.004633431322872639, + "loss_iou": 0.240234375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 737311532, + "step": 7604 + }, + { + "epoch": 0.7435471255377395, + "grad_norm": 6.375419829329944, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 737407900, + "step": 7605 + }, + { + "epoch": 0.7435471255377395, + "loss": 0.03572684898972511, + "loss_ce": 0.0051634954288601875, + "loss_iou": 0.1552734375, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 737407900, + "step": 7605 + }, + { + "epoch": 0.7436448963629253, + "grad_norm": 5.929136965690271, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 737504884, + "step": 7606 + }, + { + "epoch": 0.7436448963629253, + "loss": 0.06215811148285866, + "loss_ce": 0.004559995606541634, + "loss_iou": 0.291015625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 737504884, + "step": 7606 + }, + { + "epoch": 0.7437426671881111, + "grad_norm": 7.8031079649027655, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 737602140, + "step": 7607 + }, + { + "epoch": 0.7437426671881111, + "loss": 0.06222999468445778, + "loss_ce": 0.003254776122048497, + "loss_iou": 0.306640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 737602140, + "step": 7607 + }, + { + "epoch": 0.7438404380132968, + "grad_norm": 4.27366272936189, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 737699196, + "step": 7608 + }, + { + "epoch": 0.7438404380132968, + "loss": 0.07506202906370163, + "loss_ce": 0.007496107369661331, + "loss_iou": 0.32421875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 737699196, + "step": 7608 + }, + { + "epoch": 0.7439382088384826, + "grad_norm": 22.53659812600765, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 737796164, + "step": 7609 + }, + { + "epoch": 0.7439382088384826, + "loss": 0.10835205018520355, + "loss_ce": 0.006896364502608776, + "loss_iou": 0.337890625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 737796164, + "step": 7609 + }, + { + "epoch": 0.7440359796636684, + "grad_norm": 14.874090786073495, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 737893412, + "step": 7610 + }, + { + "epoch": 0.7440359796636684, + "loss": 0.11393770575523376, + "loss_ce": 0.007324554026126862, + "loss_iou": 0.341796875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 737893412, + "step": 7610 + }, + { + "epoch": 0.7441337504888541, + "grad_norm": 4.506675396477377, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 737990252, + "step": 7611 + }, + { + "epoch": 0.7441337504888541, + "loss": 0.08784190565347672, + "loss_ce": 0.0077790371142327785, + "loss_iou": 0.392578125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 737990252, + "step": 7611 + }, + { + "epoch": 0.7442315213140399, + "grad_norm": 10.062248461515336, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 738086744, + "step": 7612 + }, + { + "epoch": 0.7442315213140399, + "loss": 0.05653886869549751, + "loss_ce": 0.0035145748406648636, + "loss_iou": 0.310546875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 738086744, + "step": 7612 + }, + { + "epoch": 0.7443292921392256, + "grad_norm": 16.462740040983388, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 738183756, + "step": 7613 + }, + { + "epoch": 0.7443292921392256, + "loss": 0.11053281277418137, + "loss_ce": 0.0039043917786329985, + "loss_iou": 0.2490234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 738183756, + "step": 7613 + }, + { + "epoch": 0.7444270629644114, + "grad_norm": 23.258449366820567, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 738280324, + "step": 7614 + }, + { + "epoch": 0.7444270629644114, + "loss": 0.08165764808654785, + "loss_ce": 0.005073790438473225, + "loss_iou": 0.27734375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 738280324, + "step": 7614 + }, + { + "epoch": 0.7445248337895972, + "grad_norm": 7.51716748581738, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 738376744, + "step": 7615 + }, + { + "epoch": 0.7445248337895972, + "loss": 0.08828887343406677, + "loss_ce": 0.005090329796075821, + "loss_iou": 0.15625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 738376744, + "step": 7615 + }, + { + "epoch": 0.7446226046147829, + "grad_norm": 5.915745368737212, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 738473632, + "step": 7616 + }, + { + "epoch": 0.7446226046147829, + "loss": 0.052362728863954544, + "loss_ce": 0.0038855550810694695, + "loss_iou": 0.25, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 738473632, + "step": 7616 + }, + { + "epoch": 0.7447203754399687, + "grad_norm": 45.360261476151095, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 738570688, + "step": 7617 + }, + { + "epoch": 0.7447203754399687, + "loss": 0.05258209630846977, + "loss_ce": 0.004303289577364922, + "loss_iou": 0.357421875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 738570688, + "step": 7617 + }, + { + "epoch": 0.7448181462651545, + "grad_norm": 13.8556556662667, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 738667004, + "step": 7618 + }, + { + "epoch": 0.7448181462651545, + "loss": 0.08616158366203308, + "loss_ce": 0.005747766233980656, + "loss_iou": 0.357421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 738667004, + "step": 7618 + }, + { + "epoch": 0.7449159170903402, + "grad_norm": 16.668423816867612, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 738763656, + "step": 7619 + }, + { + "epoch": 0.7449159170903402, + "loss": 0.054272785782814026, + "loss_ce": 0.0021182436030358076, + "loss_iou": 0.28125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 738763656, + "step": 7619 + }, + { + "epoch": 0.745013687915526, + "grad_norm": 5.526177833952869, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 738859996, + "step": 7620 + }, + { + "epoch": 0.745013687915526, + "loss": 0.06361867487430573, + "loss_ce": 0.003239645157009363, + "loss_iou": 0.1845703125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 738859996, + "step": 7620 + }, + { + "epoch": 0.7451114587407117, + "grad_norm": 5.465290464161402, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 738957256, + "step": 7621 + }, + { + "epoch": 0.7451114587407117, + "loss": 0.06235097721219063, + "loss_ce": 0.0023991945199668407, + "loss_iou": 0.39453125, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 738957256, + "step": 7621 + }, + { + "epoch": 0.7452092295658975, + "grad_norm": 4.5738814531067264, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 739054348, + "step": 7622 + }, + { + "epoch": 0.7452092295658975, + "loss": 0.08030734956264496, + "loss_ce": 0.0025790773797780275, + "loss_iou": 0.3828125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 739054348, + "step": 7622 + }, + { + "epoch": 0.7453070003910833, + "grad_norm": 9.268119843436383, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 739151876, + "step": 7623 + }, + { + "epoch": 0.7453070003910833, + "loss": 0.0744919627904892, + "loss_ce": 0.005468830466270447, + "loss_iou": 0.380859375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 739151876, + "step": 7623 + }, + { + "epoch": 0.745404771216269, + "grad_norm": 3.0309915559746323, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 739248624, + "step": 7624 + }, + { + "epoch": 0.745404771216269, + "loss": 0.11149251461029053, + "loss_ce": 0.005703337490558624, + "loss_iou": 0.279296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 739248624, + "step": 7624 + }, + { + "epoch": 0.7455025420414548, + "grad_norm": 10.260810653685018, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 739345568, + "step": 7625 + }, + { + "epoch": 0.7455025420414548, + "loss": 0.04143420234322548, + "loss_ce": 0.003638181835412979, + "loss_iou": 0.2158203125, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 739345568, + "step": 7625 + }, + { + "epoch": 0.7456003128666406, + "grad_norm": 4.8285866980893095, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 739443084, + "step": 7626 + }, + { + "epoch": 0.7456003128666406, + "loss": 0.0764528289437294, + "loss_ce": 0.011450384743511677, + "loss_iou": 0.265625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 739443084, + "step": 7626 + }, + { + "epoch": 0.7456980836918263, + "grad_norm": 9.967875499617605, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 739539792, + "step": 7627 + }, + { + "epoch": 0.7456980836918263, + "loss": 0.0672910064458847, + "loss_ce": 0.00971577875316143, + "loss_iou": 0.341796875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 739539792, + "step": 7627 + }, + { + "epoch": 0.7457958545170121, + "grad_norm": 3.9472709202384495, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 739637012, + "step": 7628 + }, + { + "epoch": 0.7457958545170121, + "loss": 0.09965319186449051, + "loss_ce": 0.0032252720557153225, + "loss_iou": 0.3671875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 739637012, + "step": 7628 + }, + { + "epoch": 0.7458936253421979, + "grad_norm": 6.650657191098243, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 739733892, + "step": 7629 + }, + { + "epoch": 0.7458936253421979, + "loss": 0.04866329953074455, + "loss_ce": 0.010607878677546978, + "loss_iou": 0.33203125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 739733892, + "step": 7629 + }, + { + "epoch": 0.7459913961673836, + "grad_norm": 19.857945361581205, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 739831796, + "step": 7630 + }, + { + "epoch": 0.7459913961673836, + "loss": 0.07605123519897461, + "loss_ce": 0.0030150441452860832, + "loss_iou": 0.318359375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 739831796, + "step": 7630 + }, + { + "epoch": 0.7460891669925694, + "grad_norm": 22.933536934282625, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 739928480, + "step": 7631 + }, + { + "epoch": 0.7460891669925694, + "loss": 0.10611683130264282, + "loss_ce": 0.006843152921646833, + "loss_iou": 0.328125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 739928480, + "step": 7631 + }, + { + "epoch": 0.7461869378177551, + "grad_norm": 3.527876318330687, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 740025844, + "step": 7632 + }, + { + "epoch": 0.7461869378177551, + "loss": 0.04456077516078949, + "loss_ce": 0.006627428345382214, + "loss_iou": 0.37890625, + "loss_num": 0.007598876953125, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 740025844, + "step": 7632 + }, + { + "epoch": 0.7462847086429409, + "grad_norm": 9.589140686777736, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 740122452, + "step": 7633 + }, + { + "epoch": 0.7462847086429409, + "loss": 0.06516426801681519, + "loss_ce": 0.0034958713222295046, + "loss_iou": 0.359375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 740122452, + "step": 7633 + }, + { + "epoch": 0.7463824794681267, + "grad_norm": 19.200738534531688, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 740220060, + "step": 7634 + }, + { + "epoch": 0.7463824794681267, + "loss": 0.03893110901117325, + "loss_ce": 0.0048429714515805244, + "loss_iou": 0.32421875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 740220060, + "step": 7634 + }, + { + "epoch": 0.7464802502933124, + "grad_norm": 8.181408326008711, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 740316872, + "step": 7635 + }, + { + "epoch": 0.7464802502933124, + "loss": 0.08647210896015167, + "loss_ce": 0.009018495678901672, + "loss_iou": 0.3125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 740316872, + "step": 7635 + }, + { + "epoch": 0.7465780211184982, + "grad_norm": 9.102595017704912, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 740413352, + "step": 7636 + }, + { + "epoch": 0.7465780211184982, + "loss": 0.06837227940559387, + "loss_ce": 0.004117522854357958, + "loss_iou": 0.287109375, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 740413352, + "step": 7636 + }, + { + "epoch": 0.746675791943684, + "grad_norm": 7.655218736135314, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 740510060, + "step": 7637 + }, + { + "epoch": 0.746675791943684, + "loss": 0.06582465767860413, + "loss_ce": 0.004751353524625301, + "loss_iou": 0.2490234375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 740510060, + "step": 7637 + }, + { + "epoch": 0.7467735627688697, + "grad_norm": 7.9457827303842, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 740606632, + "step": 7638 + }, + { + "epoch": 0.7467735627688697, + "loss": 0.08096520602703094, + "loss_ce": 0.006517576985061169, + "loss_iou": 0.2216796875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 740606632, + "step": 7638 + }, + { + "epoch": 0.7468713335940556, + "grad_norm": 26.880531994180437, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 740703404, + "step": 7639 + }, + { + "epoch": 0.7468713335940556, + "loss": 0.11048644036054611, + "loss_ce": 0.006055290345102549, + "loss_iou": 0.265625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 740703404, + "step": 7639 + }, + { + "epoch": 0.7469691044192412, + "grad_norm": 34.90663863484252, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 740801224, + "step": 7640 + }, + { + "epoch": 0.7469691044192412, + "loss": 0.08038652688264847, + "loss_ce": 0.009997730143368244, + "loss_iou": 0.27734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 740801224, + "step": 7640 + }, + { + "epoch": 0.747066875244427, + "grad_norm": 10.204882735763423, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 740898356, + "step": 7641 + }, + { + "epoch": 0.747066875244427, + "loss": 0.05289439857006073, + "loss_ce": 0.004310416057705879, + "loss_iou": 0.419921875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 740898356, + "step": 7641 + }, + { + "epoch": 0.7471646460696129, + "grad_norm": 5.48913831333235, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 740995288, + "step": 7642 + }, + { + "epoch": 0.7471646460696129, + "loss": 0.11286009103059769, + "loss_ce": 0.006895432248711586, + "loss_iou": 0.30078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 740995288, + "step": 7642 + }, + { + "epoch": 0.7472624168947986, + "grad_norm": 5.089550991355098, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 741092580, + "step": 7643 + }, + { + "epoch": 0.7472624168947986, + "loss": 0.04096974432468414, + "loss_ce": 0.004974262323230505, + "loss_iou": 0.291015625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 741092580, + "step": 7643 + }, + { + "epoch": 0.7473601877199844, + "grad_norm": 18.66117417047635, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 741189252, + "step": 7644 + }, + { + "epoch": 0.7473601877199844, + "loss": 0.07757529616355896, + "loss_ce": 0.004996866919100285, + "loss_iou": 0.1748046875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 741189252, + "step": 7644 + }, + { + "epoch": 0.7474579585451702, + "grad_norm": 2.2364092889162355, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 741286712, + "step": 7645 + }, + { + "epoch": 0.7474579585451702, + "loss": 0.0756600946187973, + "loss_ce": 0.0061257947236299515, + "loss_iou": 0.30859375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 741286712, + "step": 7645 + }, + { + "epoch": 0.7475557293703559, + "grad_norm": 12.936154501939923, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 741383772, + "step": 7646 + }, + { + "epoch": 0.7475557293703559, + "loss": 0.05414317920804024, + "loss_ce": 0.004979362711310387, + "loss_iou": 0.30859375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 741383772, + "step": 7646 + }, + { + "epoch": 0.7476535001955417, + "grad_norm": 9.578397173872334, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 741479748, + "step": 7647 + }, + { + "epoch": 0.7476535001955417, + "loss": 0.04250578582286835, + "loss_ce": 0.0029206713661551476, + "loss_iou": 0.322265625, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 741479748, + "step": 7647 + }, + { + "epoch": 0.7477512710207275, + "grad_norm": 32.55036617849942, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 741576800, + "step": 7648 + }, + { + "epoch": 0.7477512710207275, + "loss": 0.12501874566078186, + "loss_ce": 0.010699888691306114, + "loss_iou": 0.28125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 741576800, + "step": 7648 + }, + { + "epoch": 0.7478490418459132, + "grad_norm": 4.127698221871847, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 741673524, + "step": 7649 + }, + { + "epoch": 0.7478490418459132, + "loss": 0.05900435894727707, + "loss_ce": 0.009275966323912144, + "loss_iou": 0.328125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 741673524, + "step": 7649 + }, + { + "epoch": 0.747946812671099, + "grad_norm": 13.651166782039686, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 741769940, + "step": 7650 + }, + { + "epoch": 0.747946812671099, + "loss": 0.1091671735048294, + "loss_ce": 0.012304379604756832, + "loss_iou": 0.30078125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 741769940, + "step": 7650 + }, + { + "epoch": 0.7480445834962847, + "grad_norm": 5.245355038016043, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 741866932, + "step": 7651 + }, + { + "epoch": 0.7480445834962847, + "loss": 0.04812419414520264, + "loss_ce": 0.004102584905922413, + "loss_iou": 0.3359375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 741866932, + "step": 7651 + }, + { + "epoch": 0.7481423543214705, + "grad_norm": 6.410444189916358, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 741964488, + "step": 7652 + }, + { + "epoch": 0.7481423543214705, + "loss": 0.1065988838672638, + "loss_ce": 0.009064706973731518, + "loss_iou": 0.328125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 741964488, + "step": 7652 + }, + { + "epoch": 0.7482401251466563, + "grad_norm": 3.572591058892519, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 742061220, + "step": 7653 + }, + { + "epoch": 0.7482401251466563, + "loss": 0.07499992847442627, + "loss_ce": 0.003970264457166195, + "loss_iou": 0.35546875, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 742061220, + "step": 7653 + }, + { + "epoch": 0.748337895971842, + "grad_norm": 4.699967328097301, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 742158624, + "step": 7654 + }, + { + "epoch": 0.748337895971842, + "loss": 0.08442707359790802, + "loss_ce": 0.006271558813750744, + "loss_iou": 0.25, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 742158624, + "step": 7654 + }, + { + "epoch": 0.7484356667970278, + "grad_norm": 2.5113139368013853, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 742255972, + "step": 7655 + }, + { + "epoch": 0.7484356667970278, + "loss": 0.06484376639127731, + "loss_ce": 0.0021225139498710632, + "loss_iou": 0.33203125, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 742255972, + "step": 7655 + }, + { + "epoch": 0.7485334376222136, + "grad_norm": 15.75198172077368, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 742353276, + "step": 7656 + }, + { + "epoch": 0.7485334376222136, + "loss": 0.061686623841524124, + "loss_ce": 0.00548850791528821, + "loss_iou": 0.349609375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 742353276, + "step": 7656 + }, + { + "epoch": 0.7486312084473993, + "grad_norm": 19.706076649207997, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 742450292, + "step": 7657 + }, + { + "epoch": 0.7486312084473993, + "loss": 0.08891517668962479, + "loss_ce": 0.0040076421573758125, + "loss_iou": 0.333984375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 742450292, + "step": 7657 + }, + { + "epoch": 0.7487289792725851, + "grad_norm": 20.323268048845, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 742546704, + "step": 7658 + }, + { + "epoch": 0.7487289792725851, + "loss": 0.08153485506772995, + "loss_ce": 0.0038981379475444555, + "loss_iou": 0.306640625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 742546704, + "step": 7658 + }, + { + "epoch": 0.7488267500977708, + "grad_norm": 7.330513599144851, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 742643832, + "step": 7659 + }, + { + "epoch": 0.7488267500977708, + "loss": 0.09549833834171295, + "loss_ce": 0.009225143119692802, + "loss_iou": 0.314453125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 742643832, + "step": 7659 + }, + { + "epoch": 0.7489245209229566, + "grad_norm": 10.24597535393185, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 742740144, + "step": 7660 + }, + { + "epoch": 0.7489245209229566, + "loss": 0.08426119387149811, + "loss_ce": 0.00471713300794363, + "loss_iou": 0.30859375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 742740144, + "step": 7660 + }, + { + "epoch": 0.7490222917481424, + "grad_norm": 2.4591915582213333, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 742837684, + "step": 7661 + }, + { + "epoch": 0.7490222917481424, + "loss": 0.04050024598836899, + "loss_ce": 0.0029178448021411896, + "loss_iou": 0.302734375, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 742837684, + "step": 7661 + }, + { + "epoch": 0.7491200625733281, + "grad_norm": 9.509623498552543, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 742935272, + "step": 7662 + }, + { + "epoch": 0.7491200625733281, + "loss": 0.056019291281700134, + "loss_ce": 0.00978515949100256, + "loss_iou": 0.23046875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 742935272, + "step": 7662 + }, + { + "epoch": 0.7492178333985139, + "grad_norm": 5.403923933209729, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 743032148, + "step": 7663 + }, + { + "epoch": 0.7492178333985139, + "loss": 0.10175123810768127, + "loss_ce": 0.008001233451068401, + "loss_iou": 0.25, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 743032148, + "step": 7663 + }, + { + "epoch": 0.7493156042236997, + "grad_norm": 38.46055634670301, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 743129144, + "step": 7664 + }, + { + "epoch": 0.7493156042236997, + "loss": 0.12018051743507385, + "loss_ce": 0.008333589881658554, + "loss_iou": 0.375, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 743129144, + "step": 7664 + }, + { + "epoch": 0.7494133750488854, + "grad_norm": 16.947416119701902, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 743226656, + "step": 7665 + }, + { + "epoch": 0.7494133750488854, + "loss": 0.04687470942735672, + "loss_ce": 0.002410599496215582, + "loss_iou": 0.306640625, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 743226656, + "step": 7665 + }, + { + "epoch": 0.7495111458740712, + "grad_norm": 7.044982265297825, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 743323488, + "step": 7666 + }, + { + "epoch": 0.7495111458740712, + "loss": 0.08057081699371338, + "loss_ce": 0.010899187996983528, + "loss_iou": 0.271484375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 743323488, + "step": 7666 + }, + { + "epoch": 0.7496089166992569, + "grad_norm": 4.45937383410954, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 743420152, + "step": 7667 + }, + { + "epoch": 0.7496089166992569, + "loss": 0.05051931366324425, + "loss_ce": 0.0065091513097286224, + "loss_iou": 0.23046875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 743420152, + "step": 7667 + }, + { + "epoch": 0.7497066875244427, + "grad_norm": 5.97262952994376, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 743517528, + "step": 7668 + }, + { + "epoch": 0.7497066875244427, + "loss": 0.10324585437774658, + "loss_ce": 0.004391794558614492, + "loss_iou": 0.265625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 743517528, + "step": 7668 + }, + { + "epoch": 0.7498044583496285, + "grad_norm": 6.879156072020807, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 743614860, + "step": 7669 + }, + { + "epoch": 0.7498044583496285, + "loss": 0.05774347484111786, + "loss_ce": 0.0056652287021279335, + "loss_iou": 0.3359375, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 743614860, + "step": 7669 + }, + { + "epoch": 0.7499022291748142, + "grad_norm": 6.783034963590525, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 743712020, + "step": 7670 + }, + { + "epoch": 0.7499022291748142, + "loss": 0.06069992482662201, + "loss_ce": 0.005840766243636608, + "loss_iou": 0.318359375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 743712020, + "step": 7670 + }, + { + "epoch": 0.75, + "grad_norm": 14.89905873207417, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 743809300, + "step": 7671 + }, + { + "epoch": 0.75, + "loss": 0.11376036703586578, + "loss_ce": 0.005594632588326931, + "loss_iou": 0.2314453125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 743809300, + "step": 7671 + }, + { + "epoch": 0.7500977708251858, + "grad_norm": 14.799988078070395, + "learning_rate": 5e-05, + "loss": 0.1233, + "num_input_tokens_seen": 743906288, + "step": 7672 + }, + { + "epoch": 0.7500977708251858, + "loss": 0.1240493506193161, + "loss_ce": 0.003695647232234478, + "loss_iou": 0.26953125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 743906288, + "step": 7672 + }, + { + "epoch": 0.7501955416503715, + "grad_norm": 9.913905379868272, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 744003468, + "step": 7673 + }, + { + "epoch": 0.7501955416503715, + "loss": 0.0694069042801857, + "loss_ce": 0.0069984570145606995, + "loss_iou": 0.26953125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 744003468, + "step": 7673 + }, + { + "epoch": 0.7502933124755573, + "grad_norm": 6.03188883037385, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 744100580, + "step": 7674 + }, + { + "epoch": 0.7502933124755573, + "loss": 0.06916414946317673, + "loss_ce": 0.005748620256781578, + "loss_iou": 0.296875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 744100580, + "step": 7674 + }, + { + "epoch": 0.7503910833007431, + "grad_norm": 5.799259131267649, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 744196500, + "step": 7675 + }, + { + "epoch": 0.7503910833007431, + "loss": 0.08041895180940628, + "loss_ce": 0.005418194457888603, + "loss_iou": 0.1943359375, + "loss_num": 0.01495361328125, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 744196500, + "step": 7675 + }, + { + "epoch": 0.7504888541259288, + "grad_norm": 3.9502450229753845, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 744293044, + "step": 7676 + }, + { + "epoch": 0.7504888541259288, + "loss": 0.08176325261592865, + "loss_ce": 0.0062475064769387245, + "loss_iou": 0.322265625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 744293044, + "step": 7676 + }, + { + "epoch": 0.7505866249511146, + "grad_norm": 10.208080133059486, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 744389680, + "step": 7677 + }, + { + "epoch": 0.7505866249511146, + "loss": 0.06457603722810745, + "loss_ce": 0.006295088212937117, + "loss_iou": 0.2490234375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 744389680, + "step": 7677 + }, + { + "epoch": 0.7506843957763003, + "grad_norm": 8.157351174577915, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 744487012, + "step": 7678 + }, + { + "epoch": 0.7506843957763003, + "loss": 0.04684486612677574, + "loss_ce": 0.003075027372688055, + "loss_iou": 0.310546875, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 744487012, + "step": 7678 + }, + { + "epoch": 0.7507821666014861, + "grad_norm": 4.196844782317662, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 744583600, + "step": 7679 + }, + { + "epoch": 0.7507821666014861, + "loss": 0.06054280698299408, + "loss_ce": 0.004955039359629154, + "loss_iou": 0.19140625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 744583600, + "step": 7679 + }, + { + "epoch": 0.7508799374266719, + "grad_norm": 10.897047898545, + "learning_rate": 5e-05, + "loss": 0.1091, + "num_input_tokens_seen": 744680792, + "step": 7680 + }, + { + "epoch": 0.7508799374266719, + "loss": 0.11378058791160583, + "loss_ce": 0.003879156894981861, + "loss_iou": 0.28515625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 744680792, + "step": 7680 + }, + { + "epoch": 0.7509777082518576, + "grad_norm": 4.087387505597775, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 744778268, + "step": 7681 + }, + { + "epoch": 0.7509777082518576, + "loss": 0.08326499164104462, + "loss_ce": 0.005658791866153479, + "loss_iou": 0.25390625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 744778268, + "step": 7681 + }, + { + "epoch": 0.7510754790770434, + "grad_norm": 2.6872272795473218, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 744875760, + "step": 7682 + }, + { + "epoch": 0.7510754790770434, + "loss": 0.12090149521827698, + "loss_ce": 0.0027984699700027704, + "loss_iou": 0.2109375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 744875760, + "step": 7682 + }, + { + "epoch": 0.7511732499022292, + "grad_norm": 6.671731324615797, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 744973324, + "step": 7683 + }, + { + "epoch": 0.7511732499022292, + "loss": 0.057098835706710815, + "loss_ce": 0.007420029491186142, + "loss_iou": 0.26953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 744973324, + "step": 7683 + }, + { + "epoch": 0.7512710207274149, + "grad_norm": 4.519895652318024, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 745070556, + "step": 7684 + }, + { + "epoch": 0.7512710207274149, + "loss": 0.06454253941774368, + "loss_ce": 0.0037820409052073956, + "loss_iou": 0.31640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 745070556, + "step": 7684 + }, + { + "epoch": 0.7513687915526007, + "grad_norm": 16.05399179953538, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 745168184, + "step": 7685 + }, + { + "epoch": 0.7513687915526007, + "loss": 0.05753397196531296, + "loss_ce": 0.0016181416576728225, + "loss_iou": 0.349609375, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 745168184, + "step": 7685 + }, + { + "epoch": 0.7514665623777864, + "grad_norm": 6.0992557265617116, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 745265396, + "step": 7686 + }, + { + "epoch": 0.7514665623777864, + "loss": 0.09597181528806686, + "loss_ce": 0.004220722708851099, + "loss_iou": 0.3828125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 745265396, + "step": 7686 + }, + { + "epoch": 0.7515643332029722, + "grad_norm": 17.027214988504618, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 745362588, + "step": 7687 + }, + { + "epoch": 0.7515643332029722, + "loss": 0.06276524066925049, + "loss_ce": 0.00150120269972831, + "loss_iou": 0.34375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 745362588, + "step": 7687 + }, + { + "epoch": 0.751662104028158, + "grad_norm": 14.435335576933811, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 745458688, + "step": 7688 + }, + { + "epoch": 0.751662104028158, + "loss": 0.12834402918815613, + "loss_ce": 0.005571815650910139, + "loss_iou": 0.328125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 745458688, + "step": 7688 + }, + { + "epoch": 0.7517598748533437, + "grad_norm": 6.887768412654133, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 745554992, + "step": 7689 + }, + { + "epoch": 0.7517598748533437, + "loss": 0.07394636422395706, + "loss_ce": 0.005266555119305849, + "loss_iou": 0.298828125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 745554992, + "step": 7689 + }, + { + "epoch": 0.7518576456785295, + "grad_norm": 10.891079892315819, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 745651252, + "step": 7690 + }, + { + "epoch": 0.7518576456785295, + "loss": 0.04368025064468384, + "loss_ce": 0.005022106226533651, + "loss_iou": 0.2294921875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 745651252, + "step": 7690 + }, + { + "epoch": 0.7519554165037153, + "grad_norm": 2.2613780549795086, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 745747608, + "step": 7691 + }, + { + "epoch": 0.7519554165037153, + "loss": 0.04647017642855644, + "loss_ce": 0.006244193762540817, + "loss_iou": 0.2177734375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 745747608, + "step": 7691 + }, + { + "epoch": 0.752053187328901, + "grad_norm": 8.726870567652613, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 745844760, + "step": 7692 + }, + { + "epoch": 0.752053187328901, + "loss": 0.07337503135204315, + "loss_ce": 0.005030919797718525, + "loss_iou": 0.302734375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 745844760, + "step": 7692 + }, + { + "epoch": 0.7521509581540868, + "grad_norm": 11.494614959553543, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 745941336, + "step": 7693 + }, + { + "epoch": 0.7521509581540868, + "loss": 0.056694332510232925, + "loss_ce": 0.004509271122515202, + "loss_iou": 0.3046875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 745941336, + "step": 7693 + }, + { + "epoch": 0.7522487289792725, + "grad_norm": 12.846644808168621, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 746038944, + "step": 7694 + }, + { + "epoch": 0.7522487289792725, + "loss": 0.06307806074619293, + "loss_ce": 0.0056897555477917194, + "loss_iou": 0.28125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 746038944, + "step": 7694 + }, + { + "epoch": 0.7523464998044583, + "grad_norm": 7.224290913736852, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 746135932, + "step": 7695 + }, + { + "epoch": 0.7523464998044583, + "loss": 0.05239975452423096, + "loss_ce": 0.004208683036267757, + "loss_iou": 0.328125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 746135932, + "step": 7695 + }, + { + "epoch": 0.7524442706296441, + "grad_norm": 6.57045138538437, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 746232972, + "step": 7696 + }, + { + "epoch": 0.7524442706296441, + "loss": 0.043180882930755615, + "loss_ce": 0.002745092613622546, + "loss_iou": 0.2421875, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 746232972, + "step": 7696 + }, + { + "epoch": 0.7525420414548298, + "grad_norm": 6.1089756017173755, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 746330216, + "step": 7697 + }, + { + "epoch": 0.7525420414548298, + "loss": 0.07952988147735596, + "loss_ce": 0.004746549297124147, + "loss_iou": 0.330078125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 746330216, + "step": 7697 + }, + { + "epoch": 0.7526398122800156, + "grad_norm": 12.586358343827202, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 746427500, + "step": 7698 + }, + { + "epoch": 0.7526398122800156, + "loss": 0.08637679368257523, + "loss_ce": 0.0065733278170228004, + "loss_iou": 0.330078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 746427500, + "step": 7698 + }, + { + "epoch": 0.7527375831052014, + "grad_norm": 6.353820492198649, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 746524956, + "step": 7699 + }, + { + "epoch": 0.7527375831052014, + "loss": 0.07406532764434814, + "loss_ce": 0.005103228148072958, + "loss_iou": 0.3125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 746524956, + "step": 7699 + }, + { + "epoch": 0.7528353539303871, + "grad_norm": 15.412181580862052, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 746622316, + "step": 7700 + }, + { + "epoch": 0.7528353539303871, + "loss": 0.07401284575462341, + "loss_ce": 0.005775541067123413, + "loss_iou": 0.291015625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 746622316, + "step": 7700 + }, + { + "epoch": 0.752933124755573, + "grad_norm": 7.078851447590659, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 746719348, + "step": 7701 + }, + { + "epoch": 0.752933124755573, + "loss": 0.16338638961315155, + "loss_ce": 0.006831219419836998, + "loss_iou": 0.166015625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 746719348, + "step": 7701 + }, + { + "epoch": 0.7530308955807588, + "grad_norm": 5.348104808583409, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 746818276, + "step": 7702 + }, + { + "epoch": 0.7530308955807588, + "loss": 0.06529312580823898, + "loss_ce": 0.003006749087944627, + "loss_iou": 0.314453125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 746818276, + "step": 7702 + }, + { + "epoch": 0.7531286664059444, + "grad_norm": 9.422544756245237, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 746916296, + "step": 7703 + }, + { + "epoch": 0.7531286664059444, + "loss": 0.05505645275115967, + "loss_ce": 0.002444146666675806, + "loss_iou": 0.306640625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 746916296, + "step": 7703 + }, + { + "epoch": 0.7532264372311303, + "grad_norm": 10.300152878420672, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 747012820, + "step": 7704 + }, + { + "epoch": 0.7532264372311303, + "loss": 0.045504502952098846, + "loss_ce": 0.0019711796194314957, + "loss_iou": 0.2431640625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 747012820, + "step": 7704 + }, + { + "epoch": 0.753324208056316, + "grad_norm": 12.055114966368865, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 747110332, + "step": 7705 + }, + { + "epoch": 0.753324208056316, + "loss": 0.09530313313007355, + "loss_ce": 0.0048948172479867935, + "loss_iou": 0.365234375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 747110332, + "step": 7705 + }, + { + "epoch": 0.7534219788815018, + "grad_norm": 12.645514288481051, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 747206856, + "step": 7706 + }, + { + "epoch": 0.7534219788815018, + "loss": 0.12016621232032776, + "loss_ce": 0.010943794623017311, + "loss_iou": 0.384765625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 747206856, + "step": 7706 + }, + { + "epoch": 0.7535197497066876, + "grad_norm": 4.19733437585905, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 747304072, + "step": 7707 + }, + { + "epoch": 0.7535197497066876, + "loss": 0.07313916832208633, + "loss_ce": 0.01036450918763876, + "loss_iou": 0.267578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 747304072, + "step": 7707 + }, + { + "epoch": 0.7536175205318733, + "grad_norm": 4.9859538911147805, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 747401212, + "step": 7708 + }, + { + "epoch": 0.7536175205318733, + "loss": 0.05725230649113655, + "loss_ce": 0.007890123873949051, + "loss_iou": 0.291015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 747401212, + "step": 7708 + }, + { + "epoch": 0.7537152913570591, + "grad_norm": 5.168416071672336, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 747498016, + "step": 7709 + }, + { + "epoch": 0.7537152913570591, + "loss": 0.1028105616569519, + "loss_ce": 0.00999134685844183, + "loss_iou": 0.255859375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 747498016, + "step": 7709 + }, + { + "epoch": 0.7538130621822449, + "grad_norm": 11.835534808180874, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 747594928, + "step": 7710 + }, + { + "epoch": 0.7538130621822449, + "loss": 0.06431721150875092, + "loss_ce": 0.004815563093870878, + "loss_iou": 0.244140625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 747594928, + "step": 7710 + }, + { + "epoch": 0.7539108330074306, + "grad_norm": 12.085311163102741, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 747691980, + "step": 7711 + }, + { + "epoch": 0.7539108330074306, + "loss": 0.060576099902391434, + "loss_ce": 0.0037599969655275345, + "loss_iou": 0.36328125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 747691980, + "step": 7711 + }, + { + "epoch": 0.7540086038326164, + "grad_norm": 4.177743499923026, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 747788924, + "step": 7712 + }, + { + "epoch": 0.7540086038326164, + "loss": 0.042206257581710815, + "loss_ce": 0.002350300084799528, + "loss_iou": 0.310546875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 747788924, + "step": 7712 + }, + { + "epoch": 0.7541063746578021, + "grad_norm": 9.789628197861214, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 747885488, + "step": 7713 + }, + { + "epoch": 0.7541063746578021, + "loss": 0.07243204861879349, + "loss_ce": 0.002974038477987051, + "loss_iou": 0.3515625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 747885488, + "step": 7713 + }, + { + "epoch": 0.7542041454829879, + "grad_norm": 12.716431255406578, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 747982492, + "step": 7714 + }, + { + "epoch": 0.7542041454829879, + "loss": 0.07758725434541702, + "loss_ce": 0.006305817514657974, + "loss_iou": 0.2890625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 747982492, + "step": 7714 + }, + { + "epoch": 0.7543019163081737, + "grad_norm": 4.764040338316787, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 748079872, + "step": 7715 + }, + { + "epoch": 0.7543019163081737, + "loss": 0.044076018035411835, + "loss_ce": 0.0059099746868014336, + "loss_iou": 0.2890625, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 748079872, + "step": 7715 + }, + { + "epoch": 0.7543996871333594, + "grad_norm": 3.595028942156441, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 748176808, + "step": 7716 + }, + { + "epoch": 0.7543996871333594, + "loss": 0.04913530498743057, + "loss_ce": 0.0033741933293640614, + "loss_iou": 0.30859375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 748176808, + "step": 7716 + }, + { + "epoch": 0.7544974579585452, + "grad_norm": 4.641232680256628, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 748273876, + "step": 7717 + }, + { + "epoch": 0.7544974579585452, + "loss": 0.10269202291965485, + "loss_ce": 0.004471201449632645, + "loss_iou": 0.244140625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 748273876, + "step": 7717 + }, + { + "epoch": 0.754595228783731, + "grad_norm": 4.1072052455603, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 748371068, + "step": 7718 + }, + { + "epoch": 0.754595228783731, + "loss": 0.1007823795080185, + "loss_ce": 0.008924471214413643, + "loss_iou": 0.373046875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 748371068, + "step": 7718 + }, + { + "epoch": 0.7546929996089167, + "grad_norm": 3.3144010288153467, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 748467740, + "step": 7719 + }, + { + "epoch": 0.7546929996089167, + "loss": 0.1064356192946434, + "loss_ce": 0.006460035219788551, + "loss_iou": 0.2431640625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 748467740, + "step": 7719 + }, + { + "epoch": 0.7547907704341025, + "grad_norm": 11.071076100119846, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 748565208, + "step": 7720 + }, + { + "epoch": 0.7547907704341025, + "loss": 0.06741224229335785, + "loss_ce": 0.005759102292358875, + "loss_iou": 0.2158203125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 748565208, + "step": 7720 + }, + { + "epoch": 0.7548885412592883, + "grad_norm": 17.076893872489116, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 748662456, + "step": 7721 + }, + { + "epoch": 0.7548885412592883, + "loss": 0.09003419429063797, + "loss_ce": 0.006858537904918194, + "loss_iou": 0.263671875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 748662456, + "step": 7721 + }, + { + "epoch": 0.754986312084474, + "grad_norm": 4.001093112661869, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 748759672, + "step": 7722 + }, + { + "epoch": 0.754986312084474, + "loss": 0.04083321988582611, + "loss_ce": 0.003662806935608387, + "loss_iou": 0.296875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 748759672, + "step": 7722 + }, + { + "epoch": 0.7550840829096598, + "grad_norm": 5.000586337394182, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 748856476, + "step": 7723 + }, + { + "epoch": 0.7550840829096598, + "loss": 0.07527318596839905, + "loss_ce": 0.005113271530717611, + "loss_iou": 0.275390625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 748856476, + "step": 7723 + }, + { + "epoch": 0.7551818537348455, + "grad_norm": 9.98701064391095, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 748953820, + "step": 7724 + }, + { + "epoch": 0.7551818537348455, + "loss": 0.08607714623212814, + "loss_ce": 0.005182674154639244, + "loss_iou": 0.314453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 748953820, + "step": 7724 + }, + { + "epoch": 0.7552796245600313, + "grad_norm": 23.290032418466225, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 749051252, + "step": 7725 + }, + { + "epoch": 0.7552796245600313, + "loss": 0.06717458367347717, + "loss_ce": 0.0048882062546908855, + "loss_iou": 0.310546875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 749051252, + "step": 7725 + }, + { + "epoch": 0.7553773953852171, + "grad_norm": 7.0851925743843145, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 749147936, + "step": 7726 + }, + { + "epoch": 0.7553773953852171, + "loss": 0.07306960225105286, + "loss_ce": 0.006182702258229256, + "loss_iou": 0.30859375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 749147936, + "step": 7726 + }, + { + "epoch": 0.7554751662104028, + "grad_norm": 5.018665100974916, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 749244980, + "step": 7727 + }, + { + "epoch": 0.7554751662104028, + "loss": 0.06550651788711548, + "loss_ce": 0.006699149962514639, + "loss_iou": 0.310546875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 749244980, + "step": 7727 + }, + { + "epoch": 0.7555729370355886, + "grad_norm": 6.248413706681601, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 749341780, + "step": 7728 + }, + { + "epoch": 0.7555729370355886, + "loss": 0.03928609937429428, + "loss_ce": 0.005258999299257994, + "loss_iou": 0.30859375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 749341780, + "step": 7728 + }, + { + "epoch": 0.7556707078607744, + "grad_norm": 12.977373493441599, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 749439352, + "step": 7729 + }, + { + "epoch": 0.7556707078607744, + "loss": 0.07975223660469055, + "loss_ce": 0.003755845595151186, + "loss_iou": 0.400390625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 749439352, + "step": 7729 + }, + { + "epoch": 0.7557684786859601, + "grad_norm": 21.872357355053325, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 749536404, + "step": 7730 + }, + { + "epoch": 0.7557684786859601, + "loss": 0.10255976021289825, + "loss_ce": 0.006375983357429504, + "loss_iou": 0.28515625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 749536404, + "step": 7730 + }, + { + "epoch": 0.7558662495111459, + "grad_norm": 16.44497232704066, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 749632592, + "step": 7731 + }, + { + "epoch": 0.7558662495111459, + "loss": 0.06783980876207352, + "loss_ce": 0.007178494706749916, + "loss_iou": 0.224609375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 749632592, + "step": 7731 + }, + { + "epoch": 0.7559640203363316, + "grad_norm": 21.826824890584213, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 749729928, + "step": 7732 + }, + { + "epoch": 0.7559640203363316, + "loss": 0.07069951295852661, + "loss_ce": 0.005086724646389484, + "loss_iou": 0.2890625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 749729928, + "step": 7732 + }, + { + "epoch": 0.7560617911615174, + "grad_norm": 22.1276479137054, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 749826356, + "step": 7733 + }, + { + "epoch": 0.7560617911615174, + "loss": 0.07996004074811935, + "loss_ce": 0.00986116286367178, + "loss_iou": 0.234375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 749826356, + "step": 7733 + }, + { + "epoch": 0.7561595619867032, + "grad_norm": 1.7776830582229788, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 749923144, + "step": 7734 + }, + { + "epoch": 0.7561595619867032, + "loss": 0.034759849309921265, + "loss_ce": 0.0036319184582680464, + "loss_iou": 0.26953125, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 749923144, + "step": 7734 + }, + { + "epoch": 0.7562573328118889, + "grad_norm": 10.50109691250056, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 750020052, + "step": 7735 + }, + { + "epoch": 0.7562573328118889, + "loss": 0.07390405237674713, + "loss_ce": 0.007955566048622131, + "loss_iou": 0.296875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 750020052, + "step": 7735 + }, + { + "epoch": 0.7563551036370747, + "grad_norm": 3.653612310705264, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 750116712, + "step": 7736 + }, + { + "epoch": 0.7563551036370747, + "loss": 0.06873583048582077, + "loss_ce": 0.006865254603326321, + "loss_iou": 0.1953125, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 750116712, + "step": 7736 + }, + { + "epoch": 0.7564528744622605, + "grad_norm": 13.263060492046556, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 750214720, + "step": 7737 + }, + { + "epoch": 0.7564528744622605, + "loss": 0.0800514817237854, + "loss_ce": 0.008655606769025326, + "loss_iou": 0.2890625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 750214720, + "step": 7737 + }, + { + "epoch": 0.7565506452874462, + "grad_norm": 20.006335314878516, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 750311076, + "step": 7738 + }, + { + "epoch": 0.7565506452874462, + "loss": 0.0738217830657959, + "loss_ce": 0.005370849743485451, + "loss_iou": 0.2412109375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 750311076, + "step": 7738 + }, + { + "epoch": 0.756648416112632, + "grad_norm": 5.55694766918573, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 750407880, + "step": 7739 + }, + { + "epoch": 0.756648416112632, + "loss": 0.10121766477823257, + "loss_ce": 0.002335943980142474, + "loss_iou": 0.26953125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 750407880, + "step": 7739 + }, + { + "epoch": 0.7567461869378177, + "grad_norm": 16.007211518031536, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 750505116, + "step": 7740 + }, + { + "epoch": 0.7567461869378177, + "loss": 0.08693977445363998, + "loss_ce": 0.0031079889740794897, + "loss_iou": 0.306640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 750505116, + "step": 7740 + }, + { + "epoch": 0.7568439577630035, + "grad_norm": 15.889122734614794, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 750602700, + "step": 7741 + }, + { + "epoch": 0.7568439577630035, + "loss": 0.05834072083234787, + "loss_ce": 0.0038821042980998755, + "loss_iou": 0.296875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 750602700, + "step": 7741 + }, + { + "epoch": 0.7569417285881893, + "grad_norm": 3.4483160965918143, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 750700120, + "step": 7742 + }, + { + "epoch": 0.7569417285881893, + "loss": 0.05029015615582466, + "loss_ce": 0.002911616815254092, + "loss_iou": 0.33203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 750700120, + "step": 7742 + }, + { + "epoch": 0.757039499413375, + "grad_norm": 3.766484365866666, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 750798144, + "step": 7743 + }, + { + "epoch": 0.757039499413375, + "loss": 0.0779176577925682, + "loss_ce": 0.006140312645584345, + "loss_iou": 0.37109375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 750798144, + "step": 7743 + }, + { + "epoch": 0.7571372702385608, + "grad_norm": 2.8272112564825256, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 750894600, + "step": 7744 + }, + { + "epoch": 0.7571372702385608, + "loss": 0.05502210929989815, + "loss_ce": 0.004164565354585648, + "loss_iou": 0.36328125, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 750894600, + "step": 7744 + }, + { + "epoch": 0.7572350410637466, + "grad_norm": 4.604362538035215, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 750990624, + "step": 7745 + }, + { + "epoch": 0.7572350410637466, + "loss": 0.09468014538288116, + "loss_ce": 0.008849459700286388, + "loss_iou": 0.25, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 750990624, + "step": 7745 + }, + { + "epoch": 0.7573328118889323, + "grad_norm": 2.8085087052686455, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 751086932, + "step": 7746 + }, + { + "epoch": 0.7573328118889323, + "loss": 0.05821627378463745, + "loss_ce": 0.002597987651824951, + "loss_iou": 0.2490234375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 751086932, + "step": 7746 + }, + { + "epoch": 0.7574305827141181, + "grad_norm": 5.476600310284342, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 751184000, + "step": 7747 + }, + { + "epoch": 0.7574305827141181, + "loss": 0.03943675756454468, + "loss_ce": 0.005287585314363241, + "loss_iou": 0.224609375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 751184000, + "step": 7747 + }, + { + "epoch": 0.7575283535393039, + "grad_norm": 11.241275329121985, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 751281560, + "step": 7748 + }, + { + "epoch": 0.7575283535393039, + "loss": 0.07240165770053864, + "loss_ce": 0.010641707107424736, + "loss_iou": 0.423828125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 751281560, + "step": 7748 + }, + { + "epoch": 0.7576261243644896, + "grad_norm": 3.597765985185844, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 751378576, + "step": 7749 + }, + { + "epoch": 0.7576261243644896, + "loss": 0.049337878823280334, + "loss_ce": 0.002432362176477909, + "loss_iou": 0.33203125, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 751378576, + "step": 7749 + }, + { + "epoch": 0.7577238951896754, + "grad_norm": 8.521394142165985, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 751475692, + "step": 7750 + }, + { + "epoch": 0.7577238951896754, + "eval_seeclick_CIoU": 0.531792089343071, + "eval_seeclick_GIoU": 0.537290945649147, + "eval_seeclick_IoU": 0.5732867419719696, + "eval_seeclick_MAE_all": 0.07444386556744576, + "eval_seeclick_MAE_h": 0.03327969368547201, + "eval_seeclick_MAE_w": 0.11566329747438431, + "eval_seeclick_MAE_x": 0.12029353901743889, + "eval_seeclick_MAE_y": 0.02853892743587494, + "eval_seeclick_NUM_probability": 0.9999981820583344, + "eval_seeclick_inside_bbox": 0.7997159063816071, + "eval_seeclick_loss": 0.2632051408290863, + "eval_seeclick_loss_ce": 0.010288518853485584, + "eval_seeclick_loss_iou": 0.42718505859375, + "eval_seeclick_loss_num": 0.049243927001953125, + "eval_seeclick_loss_xval": 0.246124267578125, + "eval_seeclick_runtime": 81.6909, + "eval_seeclick_samples_per_second": 0.526, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 751475692, + "step": 7750 + }, + { + "epoch": 0.7577238951896754, + "eval_icons_CIoU": 0.7498865127563477, + "eval_icons_GIoU": 0.7501131296157837, + "eval_icons_IoU": 0.7679618000984192, + "eval_icons_MAE_all": 0.04373438283801079, + "eval_icons_MAE_h": 0.049957784824073315, + "eval_icons_MAE_w": 0.03835670463740826, + "eval_icons_MAE_x": 0.037116577848792076, + "eval_icons_MAE_y": 0.04950646311044693, + "eval_icons_NUM_probability": 0.9999973177909851, + "eval_icons_inside_bbox": 0.8784722089767456, + "eval_icons_loss": 0.14215999841690063, + "eval_icons_loss_ce": 1.549054189808885e-06, + "eval_icons_loss_iou": 0.39556884765625, + "eval_icons_loss_num": 0.030902862548828125, + "eval_icons_loss_xval": 0.15453338623046875, + "eval_icons_runtime": 86.0296, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 751475692, + "step": 7750 + }, + { + "epoch": 0.7577238951896754, + "eval_screenspot_CIoU": 0.31822611888249713, + "eval_screenspot_GIoU": 0.30518539249897003, + "eval_screenspot_IoU": 0.41036054491996765, + "eval_screenspot_MAE_all": 0.1538415551185608, + "eval_screenspot_MAE_h": 0.10873774935801823, + "eval_screenspot_MAE_w": 0.19965540369351706, + "eval_screenspot_MAE_x": 0.20177718003590903, + "eval_screenspot_MAE_y": 0.10519588987032573, + "eval_screenspot_NUM_probability": 0.9999940395355225, + "eval_screenspot_inside_bbox": 0.6137500007947286, + "eval_screenspot_loss": 0.5233551859855652, + "eval_screenspot_loss_ce": 0.016864531363050144, + "eval_screenspot_loss_iou": 0.3239339192708333, + "eval_screenspot_loss_num": 0.10280354817708333, + "eval_screenspot_loss_xval": 0.5141805013020834, + "eval_screenspot_runtime": 164.1226, + "eval_screenspot_samples_per_second": 0.542, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 751475692, + "step": 7750 + }, + { + "epoch": 0.7577238951896754, + "eval_compot_CIoU": 0.4988256096839905, + "eval_compot_GIoU": 0.48238520324230194, + "eval_compot_IoU": 0.5511042475700378, + "eval_compot_MAE_all": 0.08260860666632652, + "eval_compot_MAE_h": 0.07508129999041557, + "eval_compot_MAE_w": 0.09056146070361137, + "eval_compot_MAE_x": 0.08784065023064613, + "eval_compot_MAE_y": 0.07695100829005241, + "eval_compot_NUM_probability": 0.9991787374019623, + "eval_compot_inside_bbox": 0.7239583432674408, + "eval_compot_loss": 0.2690465748310089, + "eval_compot_loss_ce": 0.014028010424226522, + "eval_compot_loss_iou": 0.452880859375, + "eval_compot_loss_num": 0.046478271484375, + "eval_compot_loss_xval": 0.2323150634765625, + "eval_compot_runtime": 88.3238, + "eval_compot_samples_per_second": 0.566, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 751475692, + "step": 7750 + }, + { + "epoch": 0.7577238951896754, + "loss": 0.23750586807727814, + "loss_ce": 0.014574961736798286, + "loss_iou": 0.458984375, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 751475692, + "step": 7750 + }, + { + "epoch": 0.7578216660148611, + "grad_norm": 4.556520903740783, + "learning_rate": 5e-05, + "loss": 0.1129, + "num_input_tokens_seen": 751573516, + "step": 7751 + }, + { + "epoch": 0.7578216660148611, + "loss": 0.14788159728050232, + "loss_ce": 0.00627240352332592, + "loss_iou": 0.2431640625, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 751573516, + "step": 7751 + }, + { + "epoch": 0.7579194368400469, + "grad_norm": 9.447563033743794, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 751670940, + "step": 7752 + }, + { + "epoch": 0.7579194368400469, + "loss": 0.0704960823059082, + "loss_ce": 0.005630971863865852, + "loss_iou": 0.408203125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 751670940, + "step": 7752 + }, + { + "epoch": 0.7580172076652327, + "grad_norm": 5.77710735986636, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 751767716, + "step": 7753 + }, + { + "epoch": 0.7580172076652327, + "loss": 0.057740241289138794, + "loss_ce": 0.006836923770606518, + "loss_iou": 0.212890625, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 751767716, + "step": 7753 + }, + { + "epoch": 0.7581149784904184, + "grad_norm": 5.971519556351113, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 751864064, + "step": 7754 + }, + { + "epoch": 0.7581149784904184, + "loss": 0.11217731982469559, + "loss_ce": 0.007769058924168348, + "loss_iou": 0.1796875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 751864064, + "step": 7754 + }, + { + "epoch": 0.7582127493156042, + "grad_norm": 9.275551775579222, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 751959680, + "step": 7755 + }, + { + "epoch": 0.7582127493156042, + "loss": 0.057234205305576324, + "loss_ce": 0.0054277582094073296, + "loss_iou": 0.14453125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 751959680, + "step": 7755 + }, + { + "epoch": 0.75831052014079, + "grad_norm": 12.207919221462754, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 752056440, + "step": 7756 + }, + { + "epoch": 0.75831052014079, + "loss": 0.07127198576927185, + "loss_ce": 0.00775537546724081, + "loss_iou": 0.361328125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 752056440, + "step": 7756 + }, + { + "epoch": 0.7584082909659757, + "grad_norm": 26.73381644894532, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 752153608, + "step": 7757 + }, + { + "epoch": 0.7584082909659757, + "loss": 0.08351129293441772, + "loss_ce": 0.005386297591030598, + "loss_iou": 0.251953125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 752153608, + "step": 7757 + }, + { + "epoch": 0.7585060617911615, + "grad_norm": 3.564825918092655, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 752250272, + "step": 7758 + }, + { + "epoch": 0.7585060617911615, + "loss": 0.095795638859272, + "loss_ce": 0.006714829243719578, + "loss_iou": 0.2578125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 752250272, + "step": 7758 + }, + { + "epoch": 0.7586038326163472, + "grad_norm": 10.35922406876128, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 752347308, + "step": 7759 + }, + { + "epoch": 0.7586038326163472, + "loss": 0.04397634416818619, + "loss_ce": 0.0023503669071942568, + "loss_iou": 0.25390625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 752347308, + "step": 7759 + }, + { + "epoch": 0.758701603441533, + "grad_norm": 8.120639461747688, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 752443528, + "step": 7760 + }, + { + "epoch": 0.758701603441533, + "loss": 0.05794954672455788, + "loss_ce": 0.0037503268104046583, + "loss_iou": 0.322265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 752443528, + "step": 7760 + }, + { + "epoch": 0.7587993742667188, + "grad_norm": 26.313722420449825, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 752541476, + "step": 7761 + }, + { + "epoch": 0.7587993742667188, + "loss": 0.06313133239746094, + "loss_ce": 0.004049309063702822, + "loss_iou": 0.328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 752541476, + "step": 7761 + }, + { + "epoch": 0.7588971450919045, + "grad_norm": 2.665294597161204, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 752638628, + "step": 7762 + }, + { + "epoch": 0.7588971450919045, + "loss": 0.04700542241334915, + "loss_ce": 0.007097968831658363, + "loss_iou": 0.3359375, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 752638628, + "step": 7762 + }, + { + "epoch": 0.7589949159170903, + "grad_norm": 6.187637194692834, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 752734580, + "step": 7763 + }, + { + "epoch": 0.7589949159170903, + "loss": 0.08516032993793488, + "loss_ce": 0.00395305547863245, + "loss_iou": 0.314453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 752734580, + "step": 7763 + }, + { + "epoch": 0.7590926867422761, + "grad_norm": 6.246540137222045, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 752831720, + "step": 7764 + }, + { + "epoch": 0.7590926867422761, + "loss": 0.08849330246448517, + "loss_ce": 0.0021819639950990677, + "loss_iou": 0.259765625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 752831720, + "step": 7764 + }, + { + "epoch": 0.7591904575674618, + "grad_norm": 9.572909931010463, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 752929864, + "step": 7765 + }, + { + "epoch": 0.7591904575674618, + "loss": 0.05253260210156441, + "loss_ce": 0.0029453523457050323, + "loss_iou": 0.251953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 752929864, + "step": 7765 + }, + { + "epoch": 0.7592882283926476, + "grad_norm": 11.081894442466638, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 753027736, + "step": 7766 + }, + { + "epoch": 0.7592882283926476, + "loss": 0.03532681614160538, + "loss_ce": 0.0014980845153331757, + "loss_iou": 0.27734375, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 753027736, + "step": 7766 + }, + { + "epoch": 0.7593859992178335, + "grad_norm": 3.6799030956744145, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 753124660, + "step": 7767 + }, + { + "epoch": 0.7593859992178335, + "loss": 0.11206748336553574, + "loss_ce": 0.0066445088014006615, + "loss_iou": 0.28125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 753124660, + "step": 7767 + }, + { + "epoch": 0.7594837700430191, + "grad_norm": 7.366219514641002, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 753221124, + "step": 7768 + }, + { + "epoch": 0.7594837700430191, + "loss": 0.0535263791680336, + "loss_ce": 0.004225234966725111, + "loss_iou": 0.265625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 753221124, + "step": 7768 + }, + { + "epoch": 0.759581540868205, + "grad_norm": 6.292469834168256, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 753318540, + "step": 7769 + }, + { + "epoch": 0.759581540868205, + "loss": 0.06968696415424347, + "loss_ce": 0.0070725190453231335, + "loss_iou": 0.30078125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 753318540, + "step": 7769 + }, + { + "epoch": 0.7596793116933906, + "grad_norm": 15.090867452867043, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 753415632, + "step": 7770 + }, + { + "epoch": 0.7596793116933906, + "loss": 0.058907460421323776, + "loss_ce": 0.0042428504675626755, + "loss_iou": 0.310546875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 753415632, + "step": 7770 + }, + { + "epoch": 0.7597770825185765, + "grad_norm": 9.631967186124209, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 753512832, + "step": 7771 + }, + { + "epoch": 0.7597770825185765, + "loss": 0.07896498590707779, + "loss_ce": 0.007180013693869114, + "loss_iou": 0.2470703125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 753512832, + "step": 7771 + }, + { + "epoch": 0.7598748533437623, + "grad_norm": 9.262661635045417, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 753610076, + "step": 7772 + }, + { + "epoch": 0.7598748533437623, + "loss": 0.07990896701812744, + "loss_ce": 0.0054613398388028145, + "loss_iou": 0.33203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 753610076, + "step": 7772 + }, + { + "epoch": 0.759972624168948, + "grad_norm": 4.821799653591122, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 753706476, + "step": 7773 + }, + { + "epoch": 0.759972624168948, + "loss": 0.034539707005023956, + "loss_ce": 0.0036864366848021746, + "loss_iou": 0.201171875, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 753706476, + "step": 7773 + }, + { + "epoch": 0.7600703949941338, + "grad_norm": 4.587474527517747, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 753803764, + "step": 7774 + }, + { + "epoch": 0.7600703949941338, + "loss": 0.054242558777332306, + "loss_ce": 0.004292911384254694, + "loss_iou": 0.279296875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 753803764, + "step": 7774 + }, + { + "epoch": 0.7601681658193196, + "grad_norm": 26.377801488312745, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 753901928, + "step": 7775 + }, + { + "epoch": 0.7601681658193196, + "loss": 0.05972710996866226, + "loss_ce": 0.007160576991736889, + "loss_iou": 0.38671875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 753901928, + "step": 7775 + }, + { + "epoch": 0.7602659366445053, + "grad_norm": 10.984667918189915, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 753998892, + "step": 7776 + }, + { + "epoch": 0.7602659366445053, + "loss": 0.07169822603464127, + "loss_ce": 0.0037432098761200905, + "loss_iou": 0.259765625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 753998892, + "step": 7776 + }, + { + "epoch": 0.7603637074696911, + "grad_norm": 9.11922925020323, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 754095828, + "step": 7777 + }, + { + "epoch": 0.7603637074696911, + "loss": 0.10150841623544693, + "loss_ce": 0.010810167528688908, + "loss_iou": 0.287109375, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 754095828, + "step": 7777 + }, + { + "epoch": 0.7604614782948768, + "grad_norm": 6.2223452151864045, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 754192708, + "step": 7778 + }, + { + "epoch": 0.7604614782948768, + "loss": 0.07995863258838654, + "loss_ce": 0.005694102495908737, + "loss_iou": 0.328125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 754192708, + "step": 7778 + }, + { + "epoch": 0.7605592491200626, + "grad_norm": 11.156944246325299, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 754290344, + "step": 7779 + }, + { + "epoch": 0.7605592491200626, + "loss": 0.05678143352270126, + "loss_ce": 0.004834791645407677, + "loss_iou": 0.271484375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 754290344, + "step": 7779 + }, + { + "epoch": 0.7606570199452484, + "grad_norm": 3.263989804084297, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 754386756, + "step": 7780 + }, + { + "epoch": 0.7606570199452484, + "loss": 0.054443396627902985, + "loss_ce": 0.005516092758625746, + "loss_iou": 0.2451171875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 754386756, + "step": 7780 + }, + { + "epoch": 0.7607547907704341, + "grad_norm": 8.540264283999363, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 754483092, + "step": 7781 + }, + { + "epoch": 0.7607547907704341, + "loss": 0.044269535690546036, + "loss_ce": 0.0034255748614668846, + "loss_iou": 0.1982421875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 754483092, + "step": 7781 + }, + { + "epoch": 0.7608525615956199, + "grad_norm": 7.329752215133203, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 754580460, + "step": 7782 + }, + { + "epoch": 0.7608525615956199, + "loss": 0.08649063855409622, + "loss_ce": 0.007097254507243633, + "loss_iou": 0.212890625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 754580460, + "step": 7782 + }, + { + "epoch": 0.7609503324208057, + "grad_norm": 16.095089451504997, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 754676824, + "step": 7783 + }, + { + "epoch": 0.7609503324208057, + "loss": 0.09540896117687225, + "loss_ce": 0.006553217768669128, + "loss_iou": 0.294921875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 754676824, + "step": 7783 + }, + { + "epoch": 0.7610481032459914, + "grad_norm": 18.111793154651732, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 754772980, + "step": 7784 + }, + { + "epoch": 0.7610481032459914, + "loss": 0.11598512530326843, + "loss_ce": 0.004992698784917593, + "loss_iou": 0.279296875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 754772980, + "step": 7784 + }, + { + "epoch": 0.7611458740711772, + "grad_norm": 12.2220840730372, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 754870372, + "step": 7785 + }, + { + "epoch": 0.7611458740711772, + "loss": 0.07974929362535477, + "loss_ce": 0.003882600227370858, + "loss_iou": 0.388671875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 754870372, + "step": 7785 + }, + { + "epoch": 0.7612436448963629, + "grad_norm": 8.241010243001043, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 754967568, + "step": 7786 + }, + { + "epoch": 0.7612436448963629, + "loss": 0.07582499831914902, + "loss_ce": 0.0030253189615905285, + "loss_iou": 0.318359375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 754967568, + "step": 7786 + }, + { + "epoch": 0.7613414157215487, + "grad_norm": 16.89548629589706, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 755064660, + "step": 7787 + }, + { + "epoch": 0.7613414157215487, + "loss": 0.0852467343211174, + "loss_ce": 0.00457351841032505, + "loss_iou": 0.294921875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 755064660, + "step": 7787 + }, + { + "epoch": 0.7614391865467345, + "grad_norm": 22.373892866683736, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 755161880, + "step": 7788 + }, + { + "epoch": 0.7614391865467345, + "loss": 0.08043107390403748, + "loss_ce": 0.00422868225723505, + "loss_iou": 0.30859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 755161880, + "step": 7788 + }, + { + "epoch": 0.7615369573719202, + "grad_norm": 10.323224238001451, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 755259000, + "step": 7789 + }, + { + "epoch": 0.7615369573719202, + "loss": 0.0794520452618599, + "loss_ce": 0.011786946095526218, + "loss_iou": 0.330078125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 755259000, + "step": 7789 + }, + { + "epoch": 0.761634728197106, + "grad_norm": 8.25081350179258, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 755356140, + "step": 7790 + }, + { + "epoch": 0.761634728197106, + "loss": 0.08077484369277954, + "loss_ce": 0.0034433004911988974, + "loss_iou": 0.35546875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 755356140, + "step": 7790 + }, + { + "epoch": 0.7617324990222918, + "grad_norm": 23.08854883720788, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 755453372, + "step": 7791 + }, + { + "epoch": 0.7617324990222918, + "loss": 0.0947326272726059, + "loss_ce": 0.005392421502619982, + "loss_iou": 0.296875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 755453372, + "step": 7791 + }, + { + "epoch": 0.7618302698474775, + "grad_norm": 13.790933040365283, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 755551072, + "step": 7792 + }, + { + "epoch": 0.7618302698474775, + "loss": 0.038816630840301514, + "loss_ce": 0.005861461162567139, + "loss_iou": 0.2392578125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 755551072, + "step": 7792 + }, + { + "epoch": 0.7619280406726633, + "grad_norm": 3.8650721610034355, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 755649228, + "step": 7793 + }, + { + "epoch": 0.7619280406726633, + "loss": 0.0788869857788086, + "loss_ce": 0.005537987686693668, + "loss_iou": 0.263671875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 755649228, + "step": 7793 + }, + { + "epoch": 0.7620258114978491, + "grad_norm": 4.189633234910922, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 755747216, + "step": 7794 + }, + { + "epoch": 0.7620258114978491, + "loss": 0.06162141263484955, + "loss_ce": 0.0032870620489120483, + "loss_iou": 0.40234375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 755747216, + "step": 7794 + }, + { + "epoch": 0.7621235823230348, + "grad_norm": 3.5444918422421408, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 755844208, + "step": 7795 + }, + { + "epoch": 0.7621235823230348, + "loss": 0.09944051504135132, + "loss_ce": 0.0045155854895710945, + "loss_iou": 0.30078125, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 755844208, + "step": 7795 + }, + { + "epoch": 0.7622213531482206, + "grad_norm": 5.784494739970807, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 755940880, + "step": 7796 + }, + { + "epoch": 0.7622213531482206, + "loss": 0.06029318645596504, + "loss_ce": 0.006879796274006367, + "loss_iou": 0.30078125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 755940880, + "step": 7796 + }, + { + "epoch": 0.7623191239734063, + "grad_norm": 2.8143472845511206, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 756038444, + "step": 7797 + }, + { + "epoch": 0.7623191239734063, + "loss": 0.06433413177728653, + "loss_ce": 0.00678179319947958, + "loss_iou": 0.271484375, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 756038444, + "step": 7797 + }, + { + "epoch": 0.7624168947985921, + "grad_norm": 8.920442342249766, + "learning_rate": 5e-05, + "loss": 0.1114, + "num_input_tokens_seen": 756135460, + "step": 7798 + }, + { + "epoch": 0.7624168947985921, + "loss": 0.10170862078666687, + "loss_ce": 0.005730839911848307, + "loss_iou": 0.32421875, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 756135460, + "step": 7798 + }, + { + "epoch": 0.7625146656237779, + "grad_norm": 5.341297332015652, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 756232424, + "step": 7799 + }, + { + "epoch": 0.7625146656237779, + "loss": 0.036088235676288605, + "loss_ce": 0.0025036409497261047, + "loss_iou": 0.326171875, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 756232424, + "step": 7799 + }, + { + "epoch": 0.7626124364489636, + "grad_norm": 5.634579181375473, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 756329128, + "step": 7800 + }, + { + "epoch": 0.7626124364489636, + "loss": 0.048270851373672485, + "loss_ce": 0.0037151845172047615, + "loss_iou": 0.3125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 756329128, + "step": 7800 + }, + { + "epoch": 0.7627102072741494, + "grad_norm": 4.6261101189534095, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 756426044, + "step": 7801 + }, + { + "epoch": 0.7627102072741494, + "loss": 0.07609494030475616, + "loss_ce": 0.00520260538905859, + "loss_iou": 0.369140625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 756426044, + "step": 7801 + }, + { + "epoch": 0.7628079780993352, + "grad_norm": 11.14937763637644, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 756523696, + "step": 7802 + }, + { + "epoch": 0.7628079780993352, + "loss": 0.10788670927286148, + "loss_ce": 0.004233760759234428, + "loss_iou": 0.291015625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 756523696, + "step": 7802 + }, + { + "epoch": 0.7629057489245209, + "grad_norm": 8.095653042858958, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 756619944, + "step": 7803 + }, + { + "epoch": 0.7629057489245209, + "loss": 0.03026546724140644, + "loss_ce": 0.005767482332885265, + "loss_iou": 0.263671875, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 756619944, + "step": 7803 + }, + { + "epoch": 0.7630035197497067, + "grad_norm": 8.242913569323731, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 756716944, + "step": 7804 + }, + { + "epoch": 0.7630035197497067, + "loss": 0.06086748465895653, + "loss_ce": 0.006614860147237778, + "loss_iou": 0.32421875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 756716944, + "step": 7804 + }, + { + "epoch": 0.7631012905748924, + "grad_norm": 4.605663598329197, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 756814140, + "step": 7805 + }, + { + "epoch": 0.7631012905748924, + "loss": 0.07552133500576019, + "loss_ce": 0.0068796733394265175, + "loss_iou": 0.267578125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 756814140, + "step": 7805 + }, + { + "epoch": 0.7631990614000782, + "grad_norm": 8.451406632441097, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 756910652, + "step": 7806 + }, + { + "epoch": 0.7631990614000782, + "loss": 0.07736361026763916, + "loss_ce": 0.009553549811244011, + "loss_iou": 0.26953125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 756910652, + "step": 7806 + }, + { + "epoch": 0.763296832225264, + "grad_norm": 11.8371060737773, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 757008008, + "step": 7807 + }, + { + "epoch": 0.763296832225264, + "loss": 0.058618269860744476, + "loss_ce": 0.00341960322111845, + "loss_iou": 0.333984375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 757008008, + "step": 7807 + }, + { + "epoch": 0.7633946030504497, + "grad_norm": 5.58808702623231, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 757104480, + "step": 7808 + }, + { + "epoch": 0.7633946030504497, + "loss": 0.10638754069805145, + "loss_ce": 0.018313810229301453, + "loss_iou": 0.3046875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 757104480, + "step": 7808 + }, + { + "epoch": 0.7634923738756355, + "grad_norm": 30.08917865541938, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 757200664, + "step": 7809 + }, + { + "epoch": 0.7634923738756355, + "loss": 0.11892616748809814, + "loss_ce": 0.0063239289447665215, + "loss_iou": 0.3515625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 757200664, + "step": 7809 + }, + { + "epoch": 0.7635901447008213, + "grad_norm": 8.493420258261253, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 757297216, + "step": 7810 + }, + { + "epoch": 0.7635901447008213, + "loss": 0.11642193794250488, + "loss_ce": 0.009961359202861786, + "loss_iou": 0.2734375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 757297216, + "step": 7810 + }, + { + "epoch": 0.763687915526007, + "grad_norm": 3.7390215054453684, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 757393660, + "step": 7811 + }, + { + "epoch": 0.763687915526007, + "loss": 0.0384632870554924, + "loss_ce": 0.003886873135343194, + "loss_iou": 0.26171875, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 757393660, + "step": 7811 + }, + { + "epoch": 0.7637856863511928, + "grad_norm": 13.431424717590655, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 757491172, + "step": 7812 + }, + { + "epoch": 0.7637856863511928, + "loss": 0.07125987857580185, + "loss_ce": 0.005746269132941961, + "loss_iou": 0.3515625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 757491172, + "step": 7812 + }, + { + "epoch": 0.7638834571763786, + "grad_norm": 3.9012065796080684, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 757588064, + "step": 7813 + }, + { + "epoch": 0.7638834571763786, + "loss": 0.04032725840806961, + "loss_ce": 0.006330675445497036, + "loss_iou": 0.251953125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 757588064, + "step": 7813 + }, + { + "epoch": 0.7639812280015643, + "grad_norm": 8.030710285521383, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 757685424, + "step": 7814 + }, + { + "epoch": 0.7639812280015643, + "loss": 0.08845487236976624, + "loss_ce": 0.009294185787439346, + "loss_iou": 0.30859375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 757685424, + "step": 7814 + }, + { + "epoch": 0.7640789988267501, + "grad_norm": 14.179245787590874, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 757781488, + "step": 7815 + }, + { + "epoch": 0.7640789988267501, + "loss": 0.08001095056533813, + "loss_ce": 0.004289210774004459, + "loss_iou": 0.2734375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 757781488, + "step": 7815 + }, + { + "epoch": 0.7641767696519358, + "grad_norm": 9.506358847378424, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 757878800, + "step": 7816 + }, + { + "epoch": 0.7641767696519358, + "loss": 0.10665605962276459, + "loss_ce": 0.010815607383847237, + "loss_iou": 0.271484375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 757878800, + "step": 7816 + }, + { + "epoch": 0.7642745404771216, + "grad_norm": 4.923728872076227, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 757976292, + "step": 7817 + }, + { + "epoch": 0.7642745404771216, + "loss": 0.04269033670425415, + "loss_ce": 0.00336843472905457, + "loss_iou": 0.34765625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 757976292, + "step": 7817 + }, + { + "epoch": 0.7643723113023074, + "grad_norm": 2.7663200647873056, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 758074172, + "step": 7818 + }, + { + "epoch": 0.7643723113023074, + "loss": 0.05458050221204758, + "loss_ce": 0.002761652460321784, + "loss_iou": 0.296875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 758074172, + "step": 7818 + }, + { + "epoch": 0.7644700821274931, + "grad_norm": 6.536130831974742, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 758170900, + "step": 7819 + }, + { + "epoch": 0.7644700821274931, + "loss": 0.050212834030389786, + "loss_ce": 0.006893130484968424, + "loss_iou": 0.228515625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 758170900, + "step": 7819 + }, + { + "epoch": 0.7645678529526789, + "grad_norm": 10.649151130079394, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 758268096, + "step": 7820 + }, + { + "epoch": 0.7645678529526789, + "loss": 0.06884325295686722, + "loss_ce": 0.00749529292806983, + "loss_iou": 0.341796875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 758268096, + "step": 7820 + }, + { + "epoch": 0.7646656237778647, + "grad_norm": 23.840814890678324, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 758365392, + "step": 7821 + }, + { + "epoch": 0.7646656237778647, + "loss": 0.12194833159446716, + "loss_ce": 0.0034485848154872656, + "loss_iou": 0.431640625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 758365392, + "step": 7821 + }, + { + "epoch": 0.7647633946030504, + "grad_norm": 47.25745583952693, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 758462132, + "step": 7822 + }, + { + "epoch": 0.7647633946030504, + "loss": 0.11925865709781647, + "loss_ce": 0.009044424630701542, + "loss_iou": 0.34375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 758462132, + "step": 7822 + }, + { + "epoch": 0.7648611654282362, + "grad_norm": 7.923368505051564, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 758559772, + "step": 7823 + }, + { + "epoch": 0.7648611654282362, + "loss": 0.07427573204040527, + "loss_ce": 0.007274385076016188, + "loss_iou": 0.345703125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 758559772, + "step": 7823 + }, + { + "epoch": 0.7649589362534219, + "grad_norm": 6.520524038347336, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 758656360, + "step": 7824 + }, + { + "epoch": 0.7649589362534219, + "loss": 0.12097784131765366, + "loss_ce": 0.004553280770778656, + "loss_iou": 0.35546875, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 758656360, + "step": 7824 + }, + { + "epoch": 0.7650567070786077, + "grad_norm": 9.444163065004506, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 758754868, + "step": 7825 + }, + { + "epoch": 0.7650567070786077, + "loss": 0.06584055721759796, + "loss_ce": 0.004210308194160461, + "loss_iou": 0.2578125, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 758754868, + "step": 7825 + }, + { + "epoch": 0.7651544779037935, + "grad_norm": 7.6450762064230355, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 758851180, + "step": 7826 + }, + { + "epoch": 0.7651544779037935, + "loss": 0.10692526400089264, + "loss_ce": 0.0029213614761829376, + "loss_iou": 0.265625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 758851180, + "step": 7826 + }, + { + "epoch": 0.7652522487289792, + "grad_norm": 9.080430390601968, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 758948392, + "step": 7827 + }, + { + "epoch": 0.7652522487289792, + "loss": 0.07799024134874344, + "loss_ce": 0.00319166062399745, + "loss_iou": 0.291015625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 758948392, + "step": 7827 + }, + { + "epoch": 0.765350019554165, + "grad_norm": 5.367380049633762, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 759046196, + "step": 7828 + }, + { + "epoch": 0.765350019554165, + "loss": 0.05456863343715668, + "loss_ce": 0.005587921477854252, + "loss_iou": 0.361328125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 759046196, + "step": 7828 + }, + { + "epoch": 0.7654477903793508, + "grad_norm": 5.325103810233067, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 759142680, + "step": 7829 + }, + { + "epoch": 0.7654477903793508, + "loss": 0.1386958658695221, + "loss_ce": 0.010010872967541218, + "loss_iou": 0.3046875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 759142680, + "step": 7829 + }, + { + "epoch": 0.7655455612045365, + "grad_norm": 6.909730647426354, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 759239572, + "step": 7830 + }, + { + "epoch": 0.7655455612045365, + "loss": 0.0813588872551918, + "loss_ce": 0.010336847975850105, + "loss_iou": 0.2373046875, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 759239572, + "step": 7830 + }, + { + "epoch": 0.7656433320297223, + "grad_norm": 3.7110944572031666, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 759337048, + "step": 7831 + }, + { + "epoch": 0.7656433320297223, + "loss": 0.06398236751556396, + "loss_ce": 0.006719943135976791, + "loss_iou": 0.318359375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 759337048, + "step": 7831 + }, + { + "epoch": 0.765741102854908, + "grad_norm": 4.3012687316433595, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 759433856, + "step": 7832 + }, + { + "epoch": 0.765741102854908, + "loss": 0.07190999388694763, + "loss_ce": 0.0023146583698689938, + "loss_iou": 0.263671875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 759433856, + "step": 7832 + }, + { + "epoch": 0.7658388736800938, + "grad_norm": 3.5627373943022453, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 759530460, + "step": 7833 + }, + { + "epoch": 0.7658388736800938, + "loss": 0.04496350884437561, + "loss_ce": 0.003543527564033866, + "loss_iou": 0.267578125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 759530460, + "step": 7833 + }, + { + "epoch": 0.7659366445052797, + "grad_norm": 7.983905793710435, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 759627516, + "step": 7834 + }, + { + "epoch": 0.7659366445052797, + "loss": 0.05668135732412338, + "loss_ce": 0.0020930380560457706, + "loss_iou": 0.296875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 759627516, + "step": 7834 + }, + { + "epoch": 0.7660344153304653, + "grad_norm": 5.473485930127374, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 759724660, + "step": 7835 + }, + { + "epoch": 0.7660344153304653, + "loss": 0.04664541035890579, + "loss_ce": 0.009001979604363441, + "loss_iou": 0.279296875, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 759724660, + "step": 7835 + }, + { + "epoch": 0.7661321861556512, + "grad_norm": 10.476753138128284, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 759820676, + "step": 7836 + }, + { + "epoch": 0.7661321861556512, + "loss": 0.059792548418045044, + "loss_ce": 0.008545901626348495, + "loss_iou": 0.255859375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 759820676, + "step": 7836 + }, + { + "epoch": 0.766229956980837, + "grad_norm": 9.46030597484635, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 759917580, + "step": 7837 + }, + { + "epoch": 0.766229956980837, + "loss": 0.05582650378346443, + "loss_ce": 0.004022915847599506, + "loss_iou": 0.2041015625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 759917580, + "step": 7837 + }, + { + "epoch": 0.7663277278060227, + "grad_norm": 12.211829049219755, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 760015280, + "step": 7838 + }, + { + "epoch": 0.7663277278060227, + "loss": 0.0926491767168045, + "loss_ce": 0.004987428430467844, + "loss_iou": 0.369140625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 760015280, + "step": 7838 + }, + { + "epoch": 0.7664254986312085, + "grad_norm": 3.318892975589334, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 760110408, + "step": 7839 + }, + { + "epoch": 0.7664254986312085, + "loss": 0.09454985707998276, + "loss_ce": 0.009848318062722683, + "loss_iou": 0.2265625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 760110408, + "step": 7839 + }, + { + "epoch": 0.7665232694563943, + "grad_norm": 7.994061027919601, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 760208648, + "step": 7840 + }, + { + "epoch": 0.7665232694563943, + "loss": 0.09030769765377045, + "loss_ce": 0.0020813823211938143, + "loss_iou": 0.275390625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 760208648, + "step": 7840 + }, + { + "epoch": 0.76662104028158, + "grad_norm": 2.6569461935975345, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 760305556, + "step": 7841 + }, + { + "epoch": 0.76662104028158, + "loss": 0.06970514357089996, + "loss_ce": 0.0072204009629786015, + "loss_iou": 0.275390625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 760305556, + "step": 7841 + }, + { + "epoch": 0.7667188111067658, + "grad_norm": 14.722102386733745, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 760402596, + "step": 7842 + }, + { + "epoch": 0.7667188111067658, + "loss": 0.050557754933834076, + "loss_ce": 0.0033699502237141132, + "loss_iou": 0.29296875, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 760402596, + "step": 7842 + }, + { + "epoch": 0.7668165819319515, + "grad_norm": 7.595761577639245, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 760499108, + "step": 7843 + }, + { + "epoch": 0.7668165819319515, + "loss": 0.07211939990520477, + "loss_ce": 0.007265727035701275, + "loss_iou": 0.2314453125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 760499108, + "step": 7843 + }, + { + "epoch": 0.7669143527571373, + "grad_norm": 8.762181329504731, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 760596924, + "step": 7844 + }, + { + "epoch": 0.7669143527571373, + "loss": 0.05027533322572708, + "loss_ce": 0.0025000644382089376, + "loss_iou": 0.326171875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 760596924, + "step": 7844 + }, + { + "epoch": 0.7670121235823231, + "grad_norm": 13.447021738316696, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 760691788, + "step": 7845 + }, + { + "epoch": 0.7670121235823231, + "loss": 0.09017978608608246, + "loss_ce": 0.013622626662254333, + "loss_iou": 0.06005859375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 760691788, + "step": 7845 + }, + { + "epoch": 0.7671098944075088, + "grad_norm": 40.77648702789133, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 760789188, + "step": 7846 + }, + { + "epoch": 0.7671098944075088, + "loss": 0.06508944928646088, + "loss_ce": 0.0069382069632411, + "loss_iou": 0.259765625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 760789188, + "step": 7846 + }, + { + "epoch": 0.7672076652326946, + "grad_norm": 4.189247378183934, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 760885364, + "step": 7847 + }, + { + "epoch": 0.7672076652326946, + "loss": 0.06584832817316055, + "loss_ce": 0.004645323846489191, + "loss_iou": 0.193359375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 760885364, + "step": 7847 + }, + { + "epoch": 0.7673054360578804, + "grad_norm": 2.586610968119662, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 760982524, + "step": 7848 + }, + { + "epoch": 0.7673054360578804, + "loss": 0.07761203497648239, + "loss_ce": 0.0011960151605308056, + "loss_iou": 0.234375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 760982524, + "step": 7848 + }, + { + "epoch": 0.7674032068830661, + "grad_norm": 10.14247730291866, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 761079724, + "step": 7849 + }, + { + "epoch": 0.7674032068830661, + "loss": 0.05978287011384964, + "loss_ce": 0.00405014306306839, + "loss_iou": 0.267578125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 761079724, + "step": 7849 + }, + { + "epoch": 0.7675009777082519, + "grad_norm": 16.31464801351191, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 761177296, + "step": 7850 + }, + { + "epoch": 0.7675009777082519, + "loss": 0.10231056064367294, + "loss_ce": 0.006332775577902794, + "loss_iou": 0.291015625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 761177296, + "step": 7850 + }, + { + "epoch": 0.7675987485334376, + "grad_norm": 23.949797340428503, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 761274400, + "step": 7851 + }, + { + "epoch": 0.7675987485334376, + "loss": 0.05027690529823303, + "loss_ce": 0.005118519067764282, + "loss_iou": 0.349609375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 761274400, + "step": 7851 + }, + { + "epoch": 0.7676965193586234, + "grad_norm": 18.974218642940055, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 761370576, + "step": 7852 + }, + { + "epoch": 0.7676965193586234, + "loss": 0.0842936709523201, + "loss_ce": 0.003162686713039875, + "loss_iou": 0.33984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 761370576, + "step": 7852 + }, + { + "epoch": 0.7677942901838092, + "grad_norm": 11.255590480153792, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 761467624, + "step": 7853 + }, + { + "epoch": 0.7677942901838092, + "loss": 0.061169520020484924, + "loss_ce": 0.004101649392396212, + "loss_iou": 0.3359375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 761467624, + "step": 7853 + }, + { + "epoch": 0.7678920610089949, + "grad_norm": 3.876579223306794, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 761564556, + "step": 7854 + }, + { + "epoch": 0.7678920610089949, + "loss": 0.07300233095884323, + "loss_ce": 0.00898408517241478, + "loss_iou": 0.251953125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 761564556, + "step": 7854 + }, + { + "epoch": 0.7679898318341807, + "grad_norm": 5.711821830889358, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 761661276, + "step": 7855 + }, + { + "epoch": 0.7679898318341807, + "loss": 0.07453365623950958, + "loss_ce": 0.004396634642034769, + "loss_iou": 0.2333984375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 761661276, + "step": 7855 + }, + { + "epoch": 0.7680876026593665, + "grad_norm": 8.179804054437737, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 761758376, + "step": 7856 + }, + { + "epoch": 0.7680876026593665, + "loss": 0.08233214914798737, + "loss_ce": 0.006091610994189978, + "loss_iou": 0.255859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 761758376, + "step": 7856 + }, + { + "epoch": 0.7681853734845522, + "grad_norm": 12.014697930271483, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 761855356, + "step": 7857 + }, + { + "epoch": 0.7681853734845522, + "loss": 0.08095939457416534, + "loss_ce": 0.004398419056087732, + "loss_iou": 0.30078125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 761855356, + "step": 7857 + }, + { + "epoch": 0.768283144309738, + "grad_norm": 4.865362170157602, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 761951988, + "step": 7858 + }, + { + "epoch": 0.768283144309738, + "loss": 0.11831191182136536, + "loss_ce": 0.006846461910754442, + "loss_iou": 0.1689453125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 761951988, + "step": 7858 + }, + { + "epoch": 0.7683809151349238, + "grad_norm": 2.1534937254207605, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 762048828, + "step": 7859 + }, + { + "epoch": 0.7683809151349238, + "loss": 0.11654812842607498, + "loss_ce": 0.0038467124104499817, + "loss_iou": 0.234375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 762048828, + "step": 7859 + }, + { + "epoch": 0.7684786859601095, + "grad_norm": 8.670730819573555, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 762145876, + "step": 7860 + }, + { + "epoch": 0.7684786859601095, + "loss": 0.08059607446193695, + "loss_ce": 0.002570253564044833, + "loss_iou": 0.244140625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 762145876, + "step": 7860 + }, + { + "epoch": 0.7685764567852953, + "grad_norm": 6.6276420883314975, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 762242028, + "step": 7861 + }, + { + "epoch": 0.7685764567852953, + "loss": 0.06455755978822708, + "loss_ce": 0.00813055969774723, + "loss_iou": 0.11767578125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 762242028, + "step": 7861 + }, + { + "epoch": 0.768674227610481, + "grad_norm": 3.998507299504134, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 762338840, + "step": 7862 + }, + { + "epoch": 0.768674227610481, + "loss": 0.10378389805555344, + "loss_ce": 0.008904745802283287, + "loss_iou": 0.2216796875, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 762338840, + "step": 7862 + }, + { + "epoch": 0.7687719984356668, + "grad_norm": 8.434649070143502, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 762435684, + "step": 7863 + }, + { + "epoch": 0.7687719984356668, + "loss": 0.06645967066287994, + "loss_ce": 0.0033798415679484606, + "loss_iou": 0.255859375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 762435684, + "step": 7863 + }, + { + "epoch": 0.7688697692608526, + "grad_norm": 3.650665075170793, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 762533760, + "step": 7864 + }, + { + "epoch": 0.7688697692608526, + "loss": 0.04649798572063446, + "loss_ce": 0.00450580008327961, + "loss_iou": 0.291015625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 762533760, + "step": 7864 + }, + { + "epoch": 0.7689675400860383, + "grad_norm": 5.366889842030369, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 762630632, + "step": 7865 + }, + { + "epoch": 0.7689675400860383, + "loss": 0.07087782025337219, + "loss_ce": 0.009766376577317715, + "loss_iou": 0.298828125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 762630632, + "step": 7865 + }, + { + "epoch": 0.7690653109112241, + "grad_norm": 6.8574297795563615, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 762726800, + "step": 7866 + }, + { + "epoch": 0.7690653109112241, + "loss": 0.06388435512781143, + "loss_ce": 0.0054470086470246315, + "loss_iou": 0.259765625, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 762726800, + "step": 7866 + }, + { + "epoch": 0.7691630817364099, + "grad_norm": 26.09077951821359, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 762823212, + "step": 7867 + }, + { + "epoch": 0.7691630817364099, + "loss": 0.10204453766345978, + "loss_ce": 0.005624252837151289, + "loss_iou": 0.322265625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 762823212, + "step": 7867 + }, + { + "epoch": 0.7692608525615956, + "grad_norm": 8.435402624207567, + "learning_rate": 5e-05, + "loss": 0.1108, + "num_input_tokens_seen": 762920340, + "step": 7868 + }, + { + "epoch": 0.7692608525615956, + "loss": 0.14242109656333923, + "loss_ce": 0.0056718215346336365, + "loss_iou": 0.373046875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 762920340, + "step": 7868 + }, + { + "epoch": 0.7693586233867814, + "grad_norm": 7.805951206489631, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 763017240, + "step": 7869 + }, + { + "epoch": 0.7693586233867814, + "loss": 0.11737652122974396, + "loss_ce": 0.006017881911247969, + "loss_iou": 0.283203125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 763017240, + "step": 7869 + }, + { + "epoch": 0.7694563942119671, + "grad_norm": 4.71409455295543, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 763114364, + "step": 7870 + }, + { + "epoch": 0.7694563942119671, + "loss": 0.08387526869773865, + "loss_ce": 0.008924096822738647, + "loss_iou": 0.3125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 763114364, + "step": 7870 + }, + { + "epoch": 0.7695541650371529, + "grad_norm": 3.6363218523698015, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 763211628, + "step": 7871 + }, + { + "epoch": 0.7695541650371529, + "loss": 0.03875692933797836, + "loss_ce": 0.00413473742082715, + "loss_iou": 0.302734375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 763211628, + "step": 7871 + }, + { + "epoch": 0.7696519358623387, + "grad_norm": 4.567149257239178, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 763309200, + "step": 7872 + }, + { + "epoch": 0.7696519358623387, + "loss": 0.04228351265192032, + "loss_ce": 0.005372501444071531, + "loss_iou": 0.2060546875, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 763309200, + "step": 7872 + }, + { + "epoch": 0.7697497066875244, + "grad_norm": 5.980251496346751, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 763406372, + "step": 7873 + }, + { + "epoch": 0.7697497066875244, + "loss": 0.06985098123550415, + "loss_ce": 0.003543913597241044, + "loss_iou": 0.212890625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 763406372, + "step": 7873 + }, + { + "epoch": 0.7698474775127102, + "grad_norm": 10.278723964930252, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 763503620, + "step": 7874 + }, + { + "epoch": 0.7698474775127102, + "loss": 0.06662904471158981, + "loss_ce": 0.005807511508464813, + "loss_iou": 0.34765625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 763503620, + "step": 7874 + }, + { + "epoch": 0.769945248337896, + "grad_norm": 8.979555681938304, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 763600104, + "step": 7875 + }, + { + "epoch": 0.769945248337896, + "loss": 0.07009617984294891, + "loss_ce": 0.010853929445147514, + "loss_iou": 0.328125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 763600104, + "step": 7875 + }, + { + "epoch": 0.7700430191630817, + "grad_norm": 19.21371307424473, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 763697952, + "step": 7876 + }, + { + "epoch": 0.7700430191630817, + "loss": 0.07972273230552673, + "loss_ce": 0.006015148479491472, + "loss_iou": 0.294921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 763697952, + "step": 7876 + }, + { + "epoch": 0.7701407899882675, + "grad_norm": 7.820122364336553, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 763795016, + "step": 7877 + }, + { + "epoch": 0.7701407899882675, + "loss": 0.0849800854921341, + "loss_ce": 0.00615318538621068, + "loss_iou": 0.296875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 763795016, + "step": 7877 + }, + { + "epoch": 0.7702385608134532, + "grad_norm": 6.526019700095905, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 763891900, + "step": 7878 + }, + { + "epoch": 0.7702385608134532, + "loss": 0.1002824455499649, + "loss_ce": 0.0040910374373197556, + "loss_iou": 0.32421875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 763891900, + "step": 7878 + }, + { + "epoch": 0.770336331638639, + "grad_norm": 8.988507534789234, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 763989104, + "step": 7879 + }, + { + "epoch": 0.770336331638639, + "loss": 0.10394370555877686, + "loss_ce": 0.0026558623649179935, + "loss_iou": 0.201171875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 763989104, + "step": 7879 + }, + { + "epoch": 0.7704341024638248, + "grad_norm": 11.9396824199423, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 764086196, + "step": 7880 + }, + { + "epoch": 0.7704341024638248, + "loss": 0.09388625621795654, + "loss_ce": 0.0026692168321460485, + "loss_iou": 0.28125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 764086196, + "step": 7880 + }, + { + "epoch": 0.7705318732890105, + "grad_norm": 15.166908953408312, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 764183672, + "step": 7881 + }, + { + "epoch": 0.7705318732890105, + "loss": 0.140880286693573, + "loss_ce": 0.006190965883433819, + "loss_iou": 0.37890625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 764183672, + "step": 7881 + }, + { + "epoch": 0.7706296441141963, + "grad_norm": 10.864182851908103, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 764280492, + "step": 7882 + }, + { + "epoch": 0.7706296441141963, + "loss": 0.09422764182090759, + "loss_ce": 0.006153911352157593, + "loss_iou": 0.314453125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 764280492, + "step": 7882 + }, + { + "epoch": 0.7707274149393821, + "grad_norm": 16.798798721507374, + "learning_rate": 5e-05, + "loss": 0.1257, + "num_input_tokens_seen": 764377460, + "step": 7883 + }, + { + "epoch": 0.7707274149393821, + "loss": 0.08836957812309265, + "loss_ce": 0.002523633651435375, + "loss_iou": 0.291015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 764377460, + "step": 7883 + }, + { + "epoch": 0.7708251857645678, + "grad_norm": 12.210044234406492, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 764474972, + "step": 7884 + }, + { + "epoch": 0.7708251857645678, + "loss": 0.08798538148403168, + "loss_ce": 0.0031465187203139067, + "loss_iou": 0.251953125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 764474972, + "step": 7884 + }, + { + "epoch": 0.7709229565897536, + "grad_norm": 36.06575925614177, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 764572472, + "step": 7885 + }, + { + "epoch": 0.7709229565897536, + "loss": 0.10446059703826904, + "loss_ce": 0.005278461612761021, + "loss_iou": 0.353515625, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 764572472, + "step": 7885 + }, + { + "epoch": 0.7710207274149394, + "grad_norm": 6.100803901683546, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 764669572, + "step": 7886 + }, + { + "epoch": 0.7710207274149394, + "loss": 0.07101858407258987, + "loss_ce": 0.006618865765631199, + "loss_iou": 0.37109375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 764669572, + "step": 7886 + }, + { + "epoch": 0.7711184982401251, + "grad_norm": 3.9212885854602937, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 764766048, + "step": 7887 + }, + { + "epoch": 0.7711184982401251, + "loss": 0.12255679816007614, + "loss_ce": 0.0045453207567334175, + "loss_iou": 0.17578125, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 764766048, + "step": 7887 + }, + { + "epoch": 0.7712162690653109, + "grad_norm": 5.2692508934224636, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 764861972, + "step": 7888 + }, + { + "epoch": 0.7712162690653109, + "loss": 0.15065550804138184, + "loss_ce": 0.011937865987420082, + "loss_iou": 0.25, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 764861972, + "step": 7888 + }, + { + "epoch": 0.7713140398904966, + "grad_norm": 5.495300476021103, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 764958864, + "step": 7889 + }, + { + "epoch": 0.7713140398904966, + "loss": 0.07027757167816162, + "loss_ce": 0.005641337018460035, + "loss_iou": 0.2734375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 764958864, + "step": 7889 + }, + { + "epoch": 0.7714118107156824, + "grad_norm": 8.720905478547978, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 765055548, + "step": 7890 + }, + { + "epoch": 0.7714118107156824, + "loss": 0.08680543303489685, + "loss_ce": 0.007764911279082298, + "loss_iou": 0.1826171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 765055548, + "step": 7890 + }, + { + "epoch": 0.7715095815408682, + "grad_norm": 6.619067395665734, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 765152232, + "step": 7891 + }, + { + "epoch": 0.7715095815408682, + "loss": 0.05042325705289841, + "loss_ce": 0.0048090131022036076, + "loss_iou": 0.169921875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 765152232, + "step": 7891 + }, + { + "epoch": 0.7716073523660539, + "grad_norm": 18.811178819436286, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 765249124, + "step": 7892 + }, + { + "epoch": 0.7716073523660539, + "loss": 0.09057313203811646, + "loss_ce": 0.006283576600253582, + "loss_iou": 0.328125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 765249124, + "step": 7892 + }, + { + "epoch": 0.7717051231912397, + "grad_norm": 8.37089243087936, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 765345780, + "step": 7893 + }, + { + "epoch": 0.7717051231912397, + "loss": 0.06893832981586456, + "loss_ce": 0.008421971462666988, + "loss_iou": 0.314453125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 765345780, + "step": 7893 + }, + { + "epoch": 0.7718028940164255, + "grad_norm": 5.676143251839937, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 765442996, + "step": 7894 + }, + { + "epoch": 0.7718028940164255, + "loss": 0.08549414575099945, + "loss_ce": 0.005492313764989376, + "loss_iou": 0.259765625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 765442996, + "step": 7894 + }, + { + "epoch": 0.7719006648416112, + "grad_norm": 4.26953473232478, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 765539804, + "step": 7895 + }, + { + "epoch": 0.7719006648416112, + "loss": 0.09866946190595627, + "loss_ce": 0.008779932744801044, + "loss_iou": 0.255859375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 765539804, + "step": 7895 + }, + { + "epoch": 0.771998435666797, + "grad_norm": 4.432537233646274, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 765636732, + "step": 7896 + }, + { + "epoch": 0.771998435666797, + "loss": 0.09321723133325577, + "loss_ce": 0.005860665813088417, + "loss_iou": 0.275390625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 765636732, + "step": 7896 + }, + { + "epoch": 0.7720962064919827, + "grad_norm": 41.238238294964695, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 765733484, + "step": 7897 + }, + { + "epoch": 0.7720962064919827, + "loss": 0.1055351048707962, + "loss_ce": 0.00502547062933445, + "loss_iou": 0.3359375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 765733484, + "step": 7897 + }, + { + "epoch": 0.7721939773171685, + "grad_norm": 34.34596588365871, + "learning_rate": 5e-05, + "loss": 0.1197, + "num_input_tokens_seen": 765830612, + "step": 7898 + }, + { + "epoch": 0.7721939773171685, + "loss": 0.1155005544424057, + "loss_ce": 0.008627993986010551, + "loss_iou": 0.27734375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 765830612, + "step": 7898 + }, + { + "epoch": 0.7722917481423544, + "grad_norm": 4.689031945545169, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 765927832, + "step": 7899 + }, + { + "epoch": 0.7722917481423544, + "loss": 0.09566329419612885, + "loss_ce": 0.004034267272800207, + "loss_iou": 0.322265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 765927832, + "step": 7899 + }, + { + "epoch": 0.77238951896754, + "grad_norm": 4.4309078934378965, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 766024416, + "step": 7900 + }, + { + "epoch": 0.77238951896754, + "loss": 0.05622432380914688, + "loss_ce": 0.0041002994403243065, + "loss_iou": 0.203125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 766024416, + "step": 7900 + }, + { + "epoch": 0.7724872897927259, + "grad_norm": 12.705346351345154, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 766121532, + "step": 7901 + }, + { + "epoch": 0.7724872897927259, + "loss": 0.09909063577651978, + "loss_ce": 0.006805473938584328, + "loss_iou": 0.349609375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 766121532, + "step": 7901 + }, + { + "epoch": 0.7725850606179117, + "grad_norm": 9.403318435996045, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 766218712, + "step": 7902 + }, + { + "epoch": 0.7725850606179117, + "loss": 0.06210957467556, + "loss_ce": 0.005484209395945072, + "loss_iou": 0.310546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 766218712, + "step": 7902 + }, + { + "epoch": 0.7726828314430974, + "grad_norm": 6.217569212855068, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 766316608, + "step": 7903 + }, + { + "epoch": 0.7726828314430974, + "loss": 0.07751555740833282, + "loss_ce": 0.0041207908652722836, + "loss_iou": 0.30078125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 766316608, + "step": 7903 + }, + { + "epoch": 0.7727806022682832, + "grad_norm": 5.72263974801701, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 766413536, + "step": 7904 + }, + { + "epoch": 0.7727806022682832, + "loss": 0.09113671630620956, + "loss_ce": 0.007091306149959564, + "loss_iou": 0.279296875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 766413536, + "step": 7904 + }, + { + "epoch": 0.772878373093469, + "grad_norm": 5.867400872213161, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 766509776, + "step": 7905 + }, + { + "epoch": 0.772878373093469, + "loss": 0.06721566617488861, + "loss_ce": 0.004006125032901764, + "loss_iou": 0.310546875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 766509776, + "step": 7905 + }, + { + "epoch": 0.7729761439186547, + "grad_norm": 9.394464613320958, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 766606236, + "step": 7906 + }, + { + "epoch": 0.7729761439186547, + "loss": 0.09840632975101471, + "loss_ce": 0.004915736149996519, + "loss_iou": 0.306640625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 766606236, + "step": 7906 + }, + { + "epoch": 0.7730739147438405, + "grad_norm": 4.0067639591921935, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 766703592, + "step": 7907 + }, + { + "epoch": 0.7730739147438405, + "loss": 0.11051112413406372, + "loss_ce": 0.006263080984354019, + "loss_iou": 0.439453125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 766703592, + "step": 7907 + }, + { + "epoch": 0.7731716855690262, + "grad_norm": 5.198262110310329, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 766800664, + "step": 7908 + }, + { + "epoch": 0.7731716855690262, + "loss": 0.1332281231880188, + "loss_ce": 0.011462979018688202, + "loss_iou": 0.310546875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 766800664, + "step": 7908 + }, + { + "epoch": 0.773269456394212, + "grad_norm": 6.783266690393707, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 766896976, + "step": 7909 + }, + { + "epoch": 0.773269456394212, + "loss": 0.07853548228740692, + "loss_ce": 0.003496570745483041, + "loss_iou": 0.265625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 766896976, + "step": 7909 + }, + { + "epoch": 0.7733672272193978, + "grad_norm": 4.192522497419933, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 766993648, + "step": 7910 + }, + { + "epoch": 0.7733672272193978, + "loss": 0.08264309167861938, + "loss_ce": 0.006944239139556885, + "loss_iou": 0.310546875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 766993648, + "step": 7910 + }, + { + "epoch": 0.7734649980445835, + "grad_norm": 5.069063119427009, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 767090740, + "step": 7911 + }, + { + "epoch": 0.7734649980445835, + "loss": 0.10673888772726059, + "loss_ce": 0.0016210913890972733, + "loss_iou": 0.296875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 767090740, + "step": 7911 + }, + { + "epoch": 0.7735627688697693, + "grad_norm": 7.151451964419733, + "learning_rate": 5e-05, + "loss": 0.115, + "num_input_tokens_seen": 767187116, + "step": 7912 + }, + { + "epoch": 0.7735627688697693, + "loss": 0.15037693083286285, + "loss_ce": 0.010423317551612854, + "loss_iou": 0.248046875, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 767187116, + "step": 7912 + }, + { + "epoch": 0.7736605396949551, + "grad_norm": 12.161099978188878, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 767283884, + "step": 7913 + }, + { + "epoch": 0.7736605396949551, + "loss": 0.08079525828361511, + "loss_ce": 0.0054473611526191235, + "loss_iou": 0.2890625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 767283884, + "step": 7913 + }, + { + "epoch": 0.7737583105201408, + "grad_norm": 7.770609480555853, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 767381464, + "step": 7914 + }, + { + "epoch": 0.7737583105201408, + "loss": 0.06363633275032043, + "loss_ce": 0.004828964360058308, + "loss_iou": 0.3125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 767381464, + "step": 7914 + }, + { + "epoch": 0.7738560813453266, + "grad_norm": 4.19381100637756, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 767477908, + "step": 7915 + }, + { + "epoch": 0.7738560813453266, + "loss": 0.04134996980428696, + "loss_ce": 0.006651484407484531, + "loss_iou": 0.2109375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 767477908, + "step": 7915 + }, + { + "epoch": 0.7739538521705123, + "grad_norm": 19.378178322065178, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 767574520, + "step": 7916 + }, + { + "epoch": 0.7739538521705123, + "loss": 0.095807746052742, + "loss_ce": 0.0036408379673957825, + "loss_iou": 0.259765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 767574520, + "step": 7916 + }, + { + "epoch": 0.7740516229956981, + "grad_norm": 2.963890388683044, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 767671080, + "step": 7917 + }, + { + "epoch": 0.7740516229956981, + "loss": 0.09882882237434387, + "loss_ce": 0.00845100823789835, + "loss_iou": 0.25390625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 767671080, + "step": 7917 + }, + { + "epoch": 0.7741493938208839, + "grad_norm": 3.883113943568484, + "learning_rate": 5e-05, + "loss": 0.1111, + "num_input_tokens_seen": 767768484, + "step": 7918 + }, + { + "epoch": 0.7741493938208839, + "loss": 0.1150316372513771, + "loss_ce": 0.005412490107119083, + "loss_iou": 0.265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 767768484, + "step": 7918 + }, + { + "epoch": 0.7742471646460696, + "grad_norm": 23.782330511707823, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 767864788, + "step": 7919 + }, + { + "epoch": 0.7742471646460696, + "loss": 0.14182926714420319, + "loss_ce": 0.004927407018840313, + "loss_iou": 0.265625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 767864788, + "step": 7919 + }, + { + "epoch": 0.7743449354712554, + "grad_norm": 42.3111069598553, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 767961692, + "step": 7920 + }, + { + "epoch": 0.7743449354712554, + "loss": 0.08066970109939575, + "loss_ce": 0.0052989087998867035, + "loss_iou": 0.21484375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 767961692, + "step": 7920 + }, + { + "epoch": 0.7744427062964412, + "grad_norm": 13.330538680683757, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 768058132, + "step": 7921 + }, + { + "epoch": 0.7744427062964412, + "loss": 0.06829914450645447, + "loss_ce": 0.0051430147141218185, + "loss_iou": 0.2890625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 768058132, + "step": 7921 + }, + { + "epoch": 0.7745404771216269, + "grad_norm": 3.8079233557900625, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 768156464, + "step": 7922 + }, + { + "epoch": 0.7745404771216269, + "loss": 0.0607854500412941, + "loss_ce": 0.003427661955356598, + "loss_iou": 0.259765625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 768156464, + "step": 7922 + }, + { + "epoch": 0.7746382479468127, + "grad_norm": 1.8456458949208405, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 768252504, + "step": 7923 + }, + { + "epoch": 0.7746382479468127, + "loss": 0.07357507199048996, + "loss_ce": 0.005402617622166872, + "loss_iou": 0.267578125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 768252504, + "step": 7923 + }, + { + "epoch": 0.7747360187719984, + "grad_norm": 17.50332264810059, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 768349620, + "step": 7924 + }, + { + "epoch": 0.7747360187719984, + "loss": 0.07091590762138367, + "loss_ce": 0.002571794204413891, + "loss_iou": 0.265625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 768349620, + "step": 7924 + }, + { + "epoch": 0.7748337895971842, + "grad_norm": 6.9981248380062615, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 768446616, + "step": 7925 + }, + { + "epoch": 0.7748337895971842, + "loss": 0.11850136518478394, + "loss_ce": 0.007325832732021809, + "loss_iou": 0.24609375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 768446616, + "step": 7925 + }, + { + "epoch": 0.77493156042237, + "grad_norm": 4.143163038963988, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 768544288, + "step": 7926 + }, + { + "epoch": 0.77493156042237, + "loss": 0.090825155377388, + "loss_ce": 0.003651694394648075, + "loss_iou": 0.244140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 768544288, + "step": 7926 + }, + { + "epoch": 0.7750293312475557, + "grad_norm": 1.4827063844625699, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 768640092, + "step": 7927 + }, + { + "epoch": 0.7750293312475557, + "loss": 0.07096060365438461, + "loss_ce": 0.005935271270573139, + "loss_iou": 0.1708984375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 768640092, + "step": 7927 + }, + { + "epoch": 0.7751271020727415, + "grad_norm": 6.3947953269083895, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 768737388, + "step": 7928 + }, + { + "epoch": 0.7751271020727415, + "loss": 0.049469396471977234, + "loss_ce": 0.0034794090315699577, + "loss_iou": 0.275390625, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 768737388, + "step": 7928 + }, + { + "epoch": 0.7752248728979273, + "grad_norm": 7.432321058122924, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 768834548, + "step": 7929 + }, + { + "epoch": 0.7752248728979273, + "loss": 0.055538054555654526, + "loss_ce": 0.00600039679557085, + "loss_iou": 0.3203125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 768834548, + "step": 7929 + }, + { + "epoch": 0.775322643723113, + "grad_norm": 26.39150259615497, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 768931680, + "step": 7930 + }, + { + "epoch": 0.775322643723113, + "loss": 0.08642973005771637, + "loss_ce": 0.002109663328155875, + "loss_iou": 0.287109375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 768931680, + "step": 7930 + }, + { + "epoch": 0.7754204145482988, + "grad_norm": 12.53019669035113, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 769028528, + "step": 7931 + }, + { + "epoch": 0.7754204145482988, + "loss": 0.05562898889183998, + "loss_ce": 0.0058853356167674065, + "loss_iou": 0.359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 769028528, + "step": 7931 + }, + { + "epoch": 0.7755181853734846, + "grad_norm": 3.6837412007426074, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 769125396, + "step": 7932 + }, + { + "epoch": 0.7755181853734846, + "loss": 0.06796275824308395, + "loss_ce": 0.0033646693918854, + "loss_iou": 0.298828125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 769125396, + "step": 7932 + }, + { + "epoch": 0.7756159561986703, + "grad_norm": 16.663937158827874, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 769222144, + "step": 7933 + }, + { + "epoch": 0.7756159561986703, + "loss": 0.04199783131480217, + "loss_ce": 0.0031794728711247444, + "loss_iou": 0.34765625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 769222144, + "step": 7933 + }, + { + "epoch": 0.7757137270238561, + "grad_norm": 21.1906470247296, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 769319764, + "step": 7934 + }, + { + "epoch": 0.7757137270238561, + "loss": 0.056569717824459076, + "loss_ce": 0.008557934314012527, + "loss_iou": 0.30859375, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 769319764, + "step": 7934 + }, + { + "epoch": 0.7758114978490418, + "grad_norm": 10.518657998824501, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 769416896, + "step": 7935 + }, + { + "epoch": 0.7758114978490418, + "loss": 0.07846817374229431, + "loss_ce": 0.00414261594414711, + "loss_iou": 0.248046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 769416896, + "step": 7935 + }, + { + "epoch": 0.7759092686742276, + "grad_norm": 11.284363144934142, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 769514588, + "step": 7936 + }, + { + "epoch": 0.7759092686742276, + "loss": 0.056380074471235275, + "loss_ce": 0.006178657524287701, + "loss_iou": 0.2890625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 769514588, + "step": 7936 + }, + { + "epoch": 0.7760070394994134, + "grad_norm": 11.117642644359238, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 769611412, + "step": 7937 + }, + { + "epoch": 0.7760070394994134, + "loss": 0.10200134664773941, + "loss_ce": 0.004009407013654709, + "loss_iou": 0.3046875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 769611412, + "step": 7937 + }, + { + "epoch": 0.7761048103245991, + "grad_norm": 22.568283940154455, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 769708608, + "step": 7938 + }, + { + "epoch": 0.7761048103245991, + "loss": 0.06374838948249817, + "loss_ce": 0.004239108879119158, + "loss_iou": 0.365234375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 769708608, + "step": 7938 + }, + { + "epoch": 0.7762025811497849, + "grad_norm": 7.110040545743255, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 769805564, + "step": 7939 + }, + { + "epoch": 0.7762025811497849, + "loss": 0.10384491086006165, + "loss_ce": 0.005028993356972933, + "loss_iou": 0.390625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 769805564, + "step": 7939 + }, + { + "epoch": 0.7763003519749707, + "grad_norm": 4.861965673834427, + "learning_rate": 5e-05, + "loss": 0.1279, + "num_input_tokens_seen": 769902096, + "step": 7940 + }, + { + "epoch": 0.7763003519749707, + "loss": 0.11771661788225174, + "loss_ce": 0.007075135596096516, + "loss_iou": 0.3125, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 769902096, + "step": 7940 + }, + { + "epoch": 0.7763981228001564, + "grad_norm": 6.321358778088876, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 769999528, + "step": 7941 + }, + { + "epoch": 0.7763981228001564, + "loss": 0.08865252137184143, + "loss_ce": 0.0032185621093958616, + "loss_iou": 0.2734375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 769999528, + "step": 7941 + }, + { + "epoch": 0.7764958936253422, + "grad_norm": 6.924247746374066, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 770096520, + "step": 7942 + }, + { + "epoch": 0.7764958936253422, + "loss": 0.053934697061777115, + "loss_ce": 0.0028787904884666204, + "loss_iou": 0.291015625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 770096520, + "step": 7942 + }, + { + "epoch": 0.7765936644505279, + "grad_norm": 5.013849342563959, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 770193624, + "step": 7943 + }, + { + "epoch": 0.7765936644505279, + "loss": 0.0452856607735157, + "loss_ce": 0.002034625271335244, + "loss_iou": 0.255859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 770193624, + "step": 7943 + }, + { + "epoch": 0.7766914352757137, + "grad_norm": 21.190087535195527, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 770290540, + "step": 7944 + }, + { + "epoch": 0.7766914352757137, + "loss": 0.08777676522731781, + "loss_ce": 0.0052267140708863735, + "loss_iou": 0.255859375, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 770290540, + "step": 7944 + }, + { + "epoch": 0.7767892061008995, + "grad_norm": 8.88474050129291, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 770387428, + "step": 7945 + }, + { + "epoch": 0.7767892061008995, + "loss": 0.08381620049476624, + "loss_ce": 0.006454136222600937, + "loss_iou": 0.353515625, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 770387428, + "step": 7945 + }, + { + "epoch": 0.7768869769260852, + "grad_norm": 7.800793403979288, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 770484648, + "step": 7946 + }, + { + "epoch": 0.7768869769260852, + "loss": 0.11880537122488022, + "loss_ce": 0.004867991432547569, + "loss_iou": 0.2275390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 770484648, + "step": 7946 + }, + { + "epoch": 0.776984747751271, + "grad_norm": 4.952385258543393, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 770582492, + "step": 7947 + }, + { + "epoch": 0.776984747751271, + "loss": 0.10095039755105972, + "loss_ce": 0.007780230138450861, + "loss_iou": 0.232421875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 770582492, + "step": 7947 + }, + { + "epoch": 0.7770825185764568, + "grad_norm": 8.735353657504746, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 770680132, + "step": 7948 + }, + { + "epoch": 0.7770825185764568, + "loss": 0.08025644719600677, + "loss_ce": 0.008402803912758827, + "loss_iou": 0.28515625, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 770680132, + "step": 7948 + }, + { + "epoch": 0.7771802894016425, + "grad_norm": 15.021402547597342, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 770777712, + "step": 7949 + }, + { + "epoch": 0.7771802894016425, + "loss": 0.07940614223480225, + "loss_ce": 0.008452766574919224, + "loss_iou": 0.3515625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 770777712, + "step": 7949 + }, + { + "epoch": 0.7772780602268283, + "grad_norm": 29.92814824477135, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 770874100, + "step": 7950 + }, + { + "epoch": 0.7772780602268283, + "loss": 0.11555015295743942, + "loss_ce": 0.010096661746501923, + "loss_iou": 0.390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 770874100, + "step": 7950 + }, + { + "epoch": 0.7773758310520141, + "grad_norm": 33.137009308213145, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 770972176, + "step": 7951 + }, + { + "epoch": 0.7773758310520141, + "loss": 0.05219114571809769, + "loss_ce": 0.003317242721095681, + "loss_iou": 0.345703125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 770972176, + "step": 7951 + }, + { + "epoch": 0.7774736018771998, + "grad_norm": 16.512198135447658, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 771069372, + "step": 7952 + }, + { + "epoch": 0.7774736018771998, + "loss": 0.07393895089626312, + "loss_ce": 0.003443350549787283, + "loss_iou": 0.30078125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 771069372, + "step": 7952 + }, + { + "epoch": 0.7775713727023856, + "grad_norm": 3.655841493614471, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 771165644, + "step": 7953 + }, + { + "epoch": 0.7775713727023856, + "loss": 0.06913522630929947, + "loss_ce": 0.004010720178484917, + "loss_iou": 0.33984375, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 771165644, + "step": 7953 + }, + { + "epoch": 0.7776691435275713, + "grad_norm": 7.050768884482461, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 771262688, + "step": 7954 + }, + { + "epoch": 0.7776691435275713, + "loss": 0.06956879794597626, + "loss_ce": 0.0075799645856022835, + "loss_iou": 0.2138671875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 771262688, + "step": 7954 + }, + { + "epoch": 0.7777669143527571, + "grad_norm": 4.163862288619697, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 771359888, + "step": 7955 + }, + { + "epoch": 0.7777669143527571, + "loss": 0.11408930271863937, + "loss_ce": 0.0029061390087008476, + "loss_iou": 0.380859375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 771359888, + "step": 7955 + }, + { + "epoch": 0.7778646851779429, + "grad_norm": 11.866538648606637, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 771456512, + "step": 7956 + }, + { + "epoch": 0.7778646851779429, + "loss": 0.06124192103743553, + "loss_ce": 0.0029228287748992443, + "loss_iou": 0.36328125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 771456512, + "step": 7956 + }, + { + "epoch": 0.7779624560031286, + "grad_norm": 15.867675414462632, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 771553020, + "step": 7957 + }, + { + "epoch": 0.7779624560031286, + "loss": 0.0877068042755127, + "loss_ce": 0.0068886298686265945, + "loss_iou": 0.28125, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 771553020, + "step": 7957 + }, + { + "epoch": 0.7780602268283144, + "grad_norm": 10.514360038783435, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 771649792, + "step": 7958 + }, + { + "epoch": 0.7780602268283144, + "loss": 0.08639602363109589, + "loss_ce": 0.009110257029533386, + "loss_iou": 0.3125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 771649792, + "step": 7958 + }, + { + "epoch": 0.7781579976535002, + "grad_norm": 16.187884096673937, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 771746636, + "step": 7959 + }, + { + "epoch": 0.7781579976535002, + "loss": 0.04948200285434723, + "loss_ce": 0.0039955563843250275, + "loss_iou": 0.2099609375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 771746636, + "step": 7959 + }, + { + "epoch": 0.7782557684786859, + "grad_norm": 27.783944788380524, + "learning_rate": 5e-05, + "loss": 0.0998, + "num_input_tokens_seen": 771843496, + "step": 7960 + }, + { + "epoch": 0.7782557684786859, + "loss": 0.11864447593688965, + "loss_ce": 0.0052182674407958984, + "loss_iou": 0.1953125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 771843496, + "step": 7960 + }, + { + "epoch": 0.7783535393038717, + "grad_norm": 16.49678927806074, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 771939404, + "step": 7961 + }, + { + "epoch": 0.7783535393038717, + "loss": 0.052666448056697845, + "loss_ce": 0.009941840544342995, + "loss_iou": 0.3203125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 771939404, + "step": 7961 + }, + { + "epoch": 0.7784513101290574, + "grad_norm": 6.661276549130204, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 772036388, + "step": 7962 + }, + { + "epoch": 0.7784513101290574, + "loss": 0.10395365953445435, + "loss_ce": 0.0069687929935753345, + "loss_iou": 0.310546875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 772036388, + "step": 7962 + }, + { + "epoch": 0.7785490809542432, + "grad_norm": 8.297815697232547, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 772132448, + "step": 7963 + }, + { + "epoch": 0.7785490809542432, + "loss": 0.07999743521213531, + "loss_ce": 0.00328005850315094, + "loss_iou": 0.2333984375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 772132448, + "step": 7963 + }, + { + "epoch": 0.778646851779429, + "grad_norm": 19.45591677081691, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 772229412, + "step": 7964 + }, + { + "epoch": 0.778646851779429, + "loss": 0.08368012309074402, + "loss_ce": 0.005864108446985483, + "loss_iou": 0.2177734375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 772229412, + "step": 7964 + }, + { + "epoch": 0.7787446226046147, + "grad_norm": 18.117160536547885, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 772326948, + "step": 7965 + }, + { + "epoch": 0.7787446226046147, + "loss": 0.10763627290725708, + "loss_ce": 0.00457841856405139, + "loss_iou": 0.359375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 772326948, + "step": 7965 + }, + { + "epoch": 0.7788423934298006, + "grad_norm": 7.218230749237985, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 772423488, + "step": 7966 + }, + { + "epoch": 0.7788423934298006, + "loss": 0.10469311475753784, + "loss_ce": 0.007692988030612469, + "loss_iou": 0.2890625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 772423488, + "step": 7966 + }, + { + "epoch": 0.7789401642549864, + "grad_norm": 2.706094783169943, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 772520348, + "step": 7967 + }, + { + "epoch": 0.7789401642549864, + "loss": 0.1134365126490593, + "loss_ce": 0.007639697287231684, + "loss_iou": 0.1689453125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 772520348, + "step": 7967 + }, + { + "epoch": 0.779037935080172, + "grad_norm": 7.849969473866778, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 772617504, + "step": 7968 + }, + { + "epoch": 0.779037935080172, + "loss": 0.10849437117576599, + "loss_ce": 0.00409374013543129, + "loss_iou": 0.283203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 772617504, + "step": 7968 + }, + { + "epoch": 0.7791357059053579, + "grad_norm": 10.734709024301926, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 772714872, + "step": 7969 + }, + { + "epoch": 0.7791357059053579, + "loss": 0.1064663827419281, + "loss_ce": 0.008588884025812149, + "loss_iou": 0.318359375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 772714872, + "step": 7969 + }, + { + "epoch": 0.7792334767305436, + "grad_norm": 13.980985323787783, + "learning_rate": 5e-05, + "loss": 0.1293, + "num_input_tokens_seen": 772811724, + "step": 7970 + }, + { + "epoch": 0.7792334767305436, + "loss": 0.11285093426704407, + "loss_ce": 0.0048187109641730785, + "loss_iou": 0.310546875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 772811724, + "step": 7970 + }, + { + "epoch": 0.7793312475557294, + "grad_norm": 11.04461527299556, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 772908292, + "step": 7971 + }, + { + "epoch": 0.7793312475557294, + "loss": 0.08982180058956146, + "loss_ce": 0.005333881359547377, + "loss_iou": 0.30859375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 772908292, + "step": 7971 + }, + { + "epoch": 0.7794290183809152, + "grad_norm": 11.145908290252956, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 773005240, + "step": 7972 + }, + { + "epoch": 0.7794290183809152, + "loss": 0.13204042613506317, + "loss_ce": 0.007604995742440224, + "loss_iou": 0.29296875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 773005240, + "step": 7972 + }, + { + "epoch": 0.7795267892061009, + "grad_norm": 8.793053897232367, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 773101808, + "step": 7973 + }, + { + "epoch": 0.7795267892061009, + "loss": 0.042109787464141846, + "loss_ce": 0.004771533887833357, + "loss_iou": 0.296875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 773101808, + "step": 7973 + }, + { + "epoch": 0.7796245600312867, + "grad_norm": 7.071687708724832, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 773199260, + "step": 7974 + }, + { + "epoch": 0.7796245600312867, + "loss": 0.041539378464221954, + "loss_ce": 0.006306834518909454, + "loss_iou": 0.35546875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 773199260, + "step": 7974 + }, + { + "epoch": 0.7797223308564725, + "grad_norm": 16.402761850851967, + "learning_rate": 5e-05, + "loss": 0.1572, + "num_input_tokens_seen": 773295688, + "step": 7975 + }, + { + "epoch": 0.7797223308564725, + "loss": 0.14643578231334686, + "loss_ce": 0.012608576565980911, + "loss_iou": 0.2255859375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 773295688, + "step": 7975 + }, + { + "epoch": 0.7798201016816582, + "grad_norm": 22.891277921513705, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 773393092, + "step": 7976 + }, + { + "epoch": 0.7798201016816582, + "loss": 0.10817578434944153, + "loss_ce": 0.0034547115210443735, + "loss_iou": 0.298828125, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 773393092, + "step": 7976 + }, + { + "epoch": 0.779917872506844, + "grad_norm": 7.679900501601948, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 773490200, + "step": 7977 + }, + { + "epoch": 0.779917872506844, + "loss": 0.05618070438504219, + "loss_ce": 0.003247965360060334, + "loss_iou": 0.421875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 773490200, + "step": 7977 + }, + { + "epoch": 0.7800156433320298, + "grad_norm": 3.8085511548628244, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 773587792, + "step": 7978 + }, + { + "epoch": 0.7800156433320298, + "loss": 0.05355458706617355, + "loss_ce": 0.0032158377580344677, + "loss_iou": 0.326171875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 773587792, + "step": 7978 + }, + { + "epoch": 0.7801134141572155, + "grad_norm": 11.593669366310591, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 773684540, + "step": 7979 + }, + { + "epoch": 0.7801134141572155, + "loss": 0.08056437969207764, + "loss_ce": 0.0061014858074486256, + "loss_iou": 0.25390625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 773684540, + "step": 7979 + }, + { + "epoch": 0.7802111849824013, + "grad_norm": 13.20819898268647, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 773781348, + "step": 7980 + }, + { + "epoch": 0.7802111849824013, + "loss": 0.10926961898803711, + "loss_ce": 0.009013654664158821, + "loss_iou": 0.1923828125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 773781348, + "step": 7980 + }, + { + "epoch": 0.780308955807587, + "grad_norm": 11.63175594541652, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 773878112, + "step": 7981 + }, + { + "epoch": 0.780308955807587, + "loss": 0.14461499452590942, + "loss_ce": 0.005973639898002148, + "loss_iou": 0.326171875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 773878112, + "step": 7981 + }, + { + "epoch": 0.7804067266327728, + "grad_norm": 6.577122888915012, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 773976400, + "step": 7982 + }, + { + "epoch": 0.7804067266327728, + "loss": 0.05068361014127731, + "loss_ce": 0.004380812868475914, + "loss_iou": 0.228515625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 773976400, + "step": 7982 + }, + { + "epoch": 0.7805044974579586, + "grad_norm": 3.188739183757838, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 774072788, + "step": 7983 + }, + { + "epoch": 0.7805044974579586, + "loss": 0.12735140323638916, + "loss_ce": 0.012178061529994011, + "loss_iou": 0.3125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 774072788, + "step": 7983 + }, + { + "epoch": 0.7806022682831443, + "grad_norm": 6.19256558221532, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 774170316, + "step": 7984 + }, + { + "epoch": 0.7806022682831443, + "loss": 0.03968463093042374, + "loss_ce": 0.0022700829431414604, + "loss_iou": 0.26953125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 774170316, + "step": 7984 + }, + { + "epoch": 0.7807000391083301, + "grad_norm": 3.757364301832501, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 774266892, + "step": 7985 + }, + { + "epoch": 0.7807000391083301, + "loss": 0.042858853936195374, + "loss_ce": 0.003865016158670187, + "loss_iou": 0.314453125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 774266892, + "step": 7985 + }, + { + "epoch": 0.7807978099335159, + "grad_norm": 8.795039769251499, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 774363620, + "step": 7986 + }, + { + "epoch": 0.7807978099335159, + "loss": 0.06562471389770508, + "loss_ce": 0.004642962943762541, + "loss_iou": 0.1689453125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 774363620, + "step": 7986 + }, + { + "epoch": 0.7808955807587016, + "grad_norm": 3.8601712770056507, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 774461188, + "step": 7987 + }, + { + "epoch": 0.7808955807587016, + "loss": 0.06300090998411179, + "loss_ce": 0.006070367526262999, + "loss_iou": 0.357421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 774461188, + "step": 7987 + }, + { + "epoch": 0.7809933515838874, + "grad_norm": 10.657757669114185, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 774558772, + "step": 7988 + }, + { + "epoch": 0.7809933515838874, + "loss": 0.1157454401254654, + "loss_ce": 0.003578085917979479, + "loss_iou": 0.36328125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 774558772, + "step": 7988 + }, + { + "epoch": 0.7810911224090731, + "grad_norm": 10.135820808618755, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 774656076, + "step": 7989 + }, + { + "epoch": 0.7810911224090731, + "loss": 0.15052157640457153, + "loss_ce": 0.004723841790109873, + "loss_iou": 0.357421875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 774656076, + "step": 7989 + }, + { + "epoch": 0.7811888932342589, + "grad_norm": 5.635408442108096, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 774754404, + "step": 7990 + }, + { + "epoch": 0.7811888932342589, + "loss": 0.059648964554071426, + "loss_ce": 0.005876993760466576, + "loss_iou": 0.3984375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 774754404, + "step": 7990 + }, + { + "epoch": 0.7812866640594447, + "grad_norm": 8.500029559013404, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 774850968, + "step": 7991 + }, + { + "epoch": 0.7812866640594447, + "loss": 0.08410850167274475, + "loss_ce": 0.006197124253958464, + "loss_iou": 0.30078125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 774850968, + "step": 7991 + }, + { + "epoch": 0.7813844348846304, + "grad_norm": 10.097388005569718, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 774948020, + "step": 7992 + }, + { + "epoch": 0.7813844348846304, + "loss": 0.05159495770931244, + "loss_ce": 0.005955922417342663, + "loss_iou": 0.34375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 774948020, + "step": 7992 + }, + { + "epoch": 0.7814822057098162, + "grad_norm": 3.6386675340406467, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 775044544, + "step": 7993 + }, + { + "epoch": 0.7814822057098162, + "loss": 0.06987306475639343, + "loss_ce": 0.0014068814925849438, + "loss_iou": 0.267578125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 775044544, + "step": 7993 + }, + { + "epoch": 0.781579976535002, + "grad_norm": 11.012053725093981, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 775142256, + "step": 7994 + }, + { + "epoch": 0.781579976535002, + "loss": 0.06117803975939751, + "loss_ce": 0.006688904948532581, + "loss_iou": 0.296875, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 775142256, + "step": 7994 + }, + { + "epoch": 0.7816777473601877, + "grad_norm": 8.751883716068376, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 775239864, + "step": 7995 + }, + { + "epoch": 0.7816777473601877, + "loss": 0.05324453115463257, + "loss_ce": 0.005072535015642643, + "loss_iou": 0.294921875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 775239864, + "step": 7995 + }, + { + "epoch": 0.7817755181853735, + "grad_norm": 7.309086864083986, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 775336320, + "step": 7996 + }, + { + "epoch": 0.7817755181853735, + "loss": 0.09692652523517609, + "loss_ce": 0.007029371336102486, + "loss_iou": 0.28125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 775336320, + "step": 7996 + }, + { + "epoch": 0.7818732890105593, + "grad_norm": 7.118865399802459, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 775432524, + "step": 7997 + }, + { + "epoch": 0.7818732890105593, + "loss": 0.0661451667547226, + "loss_ce": 0.004430993925780058, + "loss_iou": 0.3125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 775432524, + "step": 7997 + }, + { + "epoch": 0.781971059835745, + "grad_norm": 11.991540042494659, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 775530260, + "step": 7998 + }, + { + "epoch": 0.781971059835745, + "loss": 0.1302359402179718, + "loss_ce": 0.005586883053183556, + "loss_iou": 0.384765625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 775530260, + "step": 7998 + }, + { + "epoch": 0.7820688306609308, + "grad_norm": 12.813527271597136, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 775626536, + "step": 7999 + }, + { + "epoch": 0.7820688306609308, + "loss": 0.08019767701625824, + "loss_ce": 0.007821427658200264, + "loss_iou": 0.28125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 775626536, + "step": 7999 + }, + { + "epoch": 0.7821666014861165, + "grad_norm": 3.4930470596987684, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 775723596, + "step": 8000 + }, + { + "epoch": 0.7821666014861165, + "eval_seeclick_CIoU": 0.5648827403783798, + "eval_seeclick_GIoU": 0.5703996866941452, + "eval_seeclick_IoU": 0.6012584865093231, + "eval_seeclick_MAE_all": 0.06519261375069618, + "eval_seeclick_MAE_h": 0.034458368085324764, + "eval_seeclick_MAE_w": 0.09194621071219444, + "eval_seeclick_MAE_x": 0.10021600127220154, + "eval_seeclick_MAE_y": 0.03414986375719309, + "eval_seeclick_NUM_probability": 0.9999981820583344, + "eval_seeclick_inside_bbox": 0.8309659063816071, + "eval_seeclick_loss": 0.2299092710018158, + "eval_seeclick_loss_ce": 0.010078650433570147, + "eval_seeclick_loss_iou": 0.392578125, + "eval_seeclick_loss_num": 0.043254852294921875, + "eval_seeclick_loss_xval": 0.2163848876953125, + "eval_seeclick_runtime": 80.7238, + "eval_seeclick_samples_per_second": 0.533, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 775723596, + "step": 8000 + }, + { + "epoch": 0.7821666014861165, + "eval_icons_CIoU": 0.6948885917663574, + "eval_icons_GIoU": 0.692289263010025, + "eval_icons_IoU": 0.7231766581535339, + "eval_icons_MAE_all": 0.05915473774075508, + "eval_icons_MAE_h": 0.06419389322400093, + "eval_icons_MAE_w": 0.054573386907577515, + "eval_icons_MAE_x": 0.05487033724784851, + "eval_icons_MAE_y": 0.06298132240772247, + "eval_icons_NUM_probability": 0.9999980926513672, + "eval_icons_inside_bbox": 0.8229166567325592, + "eval_icons_loss": 0.16768047213554382, + "eval_icons_loss_ce": 3.978652898695145e-06, + "eval_icons_loss_iou": 0.35076904296875, + "eval_icons_loss_num": 0.03555488586425781, + "eval_icons_loss_xval": 0.1777191162109375, + "eval_icons_runtime": 97.1795, + "eval_icons_samples_per_second": 0.515, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 775723596, + "step": 8000 + }, + { + "epoch": 0.7821666014861165, + "eval_screenspot_CIoU": 0.25634125123421353, + "eval_screenspot_GIoU": 0.239154651761055, + "eval_screenspot_IoU": 0.3687785565853119, + "eval_screenspot_MAE_all": 0.18175508826971054, + "eval_screenspot_MAE_h": 0.11959022283554077, + "eval_screenspot_MAE_w": 0.23789302508036295, + "eval_screenspot_MAE_x": 0.2511114627122879, + "eval_screenspot_MAE_y": 0.11842568467060725, + "eval_screenspot_NUM_probability": 0.999993642171224, + "eval_screenspot_inside_bbox": 0.5558333297570547, + "eval_screenspot_loss": 0.6316961646080017, + "eval_screenspot_loss_ce": 0.02102559618651867, + "eval_screenspot_loss_iou": 0.3402099609375, + "eval_screenspot_loss_num": 0.12591044108072916, + "eval_screenspot_loss_xval": 0.6296793619791666, + "eval_screenspot_runtime": 153.0792, + "eval_screenspot_samples_per_second": 0.581, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 775723596, + "step": 8000 + }, + { + "epoch": 0.7821666014861165, + "eval_compot_CIoU": 0.5064851641654968, + "eval_compot_GIoU": 0.49506625533103943, + "eval_compot_IoU": 0.5565301775932312, + "eval_compot_MAE_all": 0.08282733708620071, + "eval_compot_MAE_h": 0.0799199528992176, + "eval_compot_MAE_w": 0.08584629371762276, + "eval_compot_MAE_x": 0.0797610841691494, + "eval_compot_MAE_y": 0.08578202500939369, + "eval_compot_NUM_probability": 0.9999853670597076, + "eval_compot_inside_bbox": 0.7517361044883728, + "eval_compot_loss": 0.27256858348846436, + "eval_compot_loss_ce": 0.01873691938817501, + "eval_compot_loss_iou": 0.4342041015625, + "eval_compot_loss_num": 0.04495429992675781, + "eval_compot_loss_xval": 0.224639892578125, + "eval_compot_runtime": 87.6086, + "eval_compot_samples_per_second": 0.571, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 775723596, + "step": 8000 + }, + { + "epoch": 0.7821666014861165, + "loss": 0.22182950377464294, + "loss_ce": 0.01986417919397354, + "loss_iou": 0.421875, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 775723596, + "step": 8000 + }, + { + "epoch": 0.7822643723113023, + "grad_norm": 8.273754044158194, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 775821184, + "step": 8001 + }, + { + "epoch": 0.7822643723113023, + "loss": 0.07210825383663177, + "loss_ce": 0.007578832097351551, + "loss_iou": 0.330078125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 775821184, + "step": 8001 + }, + { + "epoch": 0.7823621431364881, + "grad_norm": 15.71238006396762, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 775917908, + "step": 8002 + }, + { + "epoch": 0.7823621431364881, + "loss": 0.0587996169924736, + "loss_ce": 0.0056303683668375015, + "loss_iou": 0.265625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 775917908, + "step": 8002 + }, + { + "epoch": 0.7824599139616738, + "grad_norm": 5.970826975602581, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 776015264, + "step": 8003 + }, + { + "epoch": 0.7824599139616738, + "loss": 0.04285605996847153, + "loss_ce": 0.0023592303041368723, + "loss_iou": 0.30078125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 776015264, + "step": 8003 + }, + { + "epoch": 0.7825576847868596, + "grad_norm": 9.456770130265648, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 776112500, + "step": 8004 + }, + { + "epoch": 0.7825576847868596, + "loss": 0.06201815605163574, + "loss_ce": 0.006170989479869604, + "loss_iou": 0.255859375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 776112500, + "step": 8004 + }, + { + "epoch": 0.7826554556120454, + "grad_norm": 7.2031141408276245, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 776208908, + "step": 8005 + }, + { + "epoch": 0.7826554556120454, + "loss": 0.07567444443702698, + "loss_ce": 0.00499572791159153, + "loss_iou": 0.2080078125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 776208908, + "step": 8005 + }, + { + "epoch": 0.7827532264372311, + "grad_norm": 38.194798194138336, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 776305620, + "step": 8006 + }, + { + "epoch": 0.7827532264372311, + "loss": 0.07683403044939041, + "loss_ce": 0.0070861103013157845, + "loss_iou": 0.251953125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 776305620, + "step": 8006 + }, + { + "epoch": 0.7828509972624169, + "grad_norm": 12.965411756371111, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 776402556, + "step": 8007 + }, + { + "epoch": 0.7828509972624169, + "loss": 0.09103572368621826, + "loss_ce": 0.010621904395520687, + "loss_iou": 0.392578125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 776402556, + "step": 8007 + }, + { + "epoch": 0.7829487680876026, + "grad_norm": 7.963347161328216, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 776498912, + "step": 8008 + }, + { + "epoch": 0.7829487680876026, + "loss": 0.07342582195997238, + "loss_ce": 0.0034184972755610943, + "loss_iou": 0.19921875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 776498912, + "step": 8008 + }, + { + "epoch": 0.7830465389127884, + "grad_norm": 2.7611289709807956, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 776595656, + "step": 8009 + }, + { + "epoch": 0.7830465389127884, + "loss": 0.08244820684194565, + "loss_ce": 0.004124844446778297, + "loss_iou": 0.27734375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 776595656, + "step": 8009 + }, + { + "epoch": 0.7831443097379742, + "grad_norm": 5.6237904743627745, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 776692280, + "step": 8010 + }, + { + "epoch": 0.7831443097379742, + "loss": 0.08033595979213715, + "loss_ce": 0.005384783260524273, + "loss_iou": 0.283203125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 776692280, + "step": 8010 + }, + { + "epoch": 0.7832420805631599, + "grad_norm": 11.730499604983848, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 776788608, + "step": 8011 + }, + { + "epoch": 0.7832420805631599, + "loss": 0.04411114379763603, + "loss_ce": 0.0022486578673124313, + "loss_iou": 0.314453125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 776788608, + "step": 8011 + }, + { + "epoch": 0.7833398513883457, + "grad_norm": 6.964443667938778, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 776885928, + "step": 8012 + }, + { + "epoch": 0.7833398513883457, + "loss": 0.14731645584106445, + "loss_ce": 0.008415717631578445, + "loss_iou": 0.30078125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 776885928, + "step": 8012 + }, + { + "epoch": 0.7834376222135315, + "grad_norm": 9.460247647518264, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 776982684, + "step": 8013 + }, + { + "epoch": 0.7834376222135315, + "loss": 0.0673021599650383, + "loss_ce": 0.0028032599948346615, + "loss_iou": 0.263671875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 776982684, + "step": 8013 + }, + { + "epoch": 0.7835353930387172, + "grad_norm": 14.16159960364728, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 777079976, + "step": 8014 + }, + { + "epoch": 0.7835353930387172, + "loss": 0.06686387956142426, + "loss_ce": 0.006381856743246317, + "loss_iou": 0.271484375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 777079976, + "step": 8014 + }, + { + "epoch": 0.783633163863903, + "grad_norm": 7.934927402847184, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 777177524, + "step": 8015 + }, + { + "epoch": 0.783633163863903, + "loss": 0.08576343953609467, + "loss_ce": 0.004937628284096718, + "loss_iou": 0.232421875, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 777177524, + "step": 8015 + }, + { + "epoch": 0.7837309346890887, + "grad_norm": 8.759350652836105, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 777274832, + "step": 8016 + }, + { + "epoch": 0.7837309346890887, + "loss": 0.06660088896751404, + "loss_ce": 0.005466556642204523, + "loss_iou": 0.2109375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 777274832, + "step": 8016 + }, + { + "epoch": 0.7838287055142745, + "grad_norm": 5.80611517714033, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 777371212, + "step": 8017 + }, + { + "epoch": 0.7838287055142745, + "loss": 0.06191391497850418, + "loss_ce": 0.003713079961016774, + "loss_iou": 0.2890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 777371212, + "step": 8017 + }, + { + "epoch": 0.7839264763394603, + "grad_norm": 8.471494740085857, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 777467260, + "step": 8018 + }, + { + "epoch": 0.7839264763394603, + "loss": 0.14918458461761475, + "loss_ce": 0.006408105604350567, + "loss_iou": 0.275390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 777467260, + "step": 8018 + }, + { + "epoch": 0.784024247164646, + "grad_norm": 2.389275790012364, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 777562576, + "step": 8019 + }, + { + "epoch": 0.784024247164646, + "loss": 0.02649777941405773, + "loss_ce": 0.0034875255078077316, + "loss_iou": 0.2001953125, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 777562576, + "step": 8019 + }, + { + "epoch": 0.7841220179898318, + "grad_norm": 6.902851723839786, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 777658928, + "step": 8020 + }, + { + "epoch": 0.7841220179898318, + "loss": 0.05584234744310379, + "loss_ce": 0.00392431765794754, + "loss_iou": 0.19140625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 777658928, + "step": 8020 + }, + { + "epoch": 0.7842197888150176, + "grad_norm": 4.642098049254465, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 777755252, + "step": 8021 + }, + { + "epoch": 0.7842197888150176, + "loss": 0.06223316490650177, + "loss_ce": 0.0010263466974720359, + "loss_iou": 0.224609375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 777755252, + "step": 8021 + }, + { + "epoch": 0.7843175596402033, + "grad_norm": 2.460722660556593, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 777852448, + "step": 8022 + }, + { + "epoch": 0.7843175596402033, + "loss": 0.046743862330913544, + "loss_ce": 0.003256315365433693, + "loss_iou": 0.177734375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 777852448, + "step": 8022 + }, + { + "epoch": 0.7844153304653891, + "grad_norm": 12.376923953973725, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 777948520, + "step": 8023 + }, + { + "epoch": 0.7844153304653891, + "loss": 0.09732992947101593, + "loss_ce": 0.009790261276066303, + "loss_iou": 0.22265625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 777948520, + "step": 8023 + }, + { + "epoch": 0.7845131012905749, + "grad_norm": 8.205901675853113, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 778045364, + "step": 8024 + }, + { + "epoch": 0.7845131012905749, + "loss": 0.055514395236968994, + "loss_ce": 0.0033751148730516434, + "loss_iou": 0.322265625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 778045364, + "step": 8024 + }, + { + "epoch": 0.7846108721157606, + "grad_norm": 20.560532523091535, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 778141656, + "step": 8025 + }, + { + "epoch": 0.7846108721157606, + "loss": 0.10206087678670883, + "loss_ce": 0.004587734118103981, + "loss_iou": 0.2890625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 778141656, + "step": 8025 + }, + { + "epoch": 0.7847086429409464, + "grad_norm": 31.943076265936202, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 778239024, + "step": 8026 + }, + { + "epoch": 0.7847086429409464, + "loss": 0.12100131809711456, + "loss_ce": 0.007872662506997585, + "loss_iou": 0.3125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 778239024, + "step": 8026 + }, + { + "epoch": 0.7848064137661321, + "grad_norm": 11.402957180625444, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 778336272, + "step": 8027 + }, + { + "epoch": 0.7848064137661321, + "loss": 0.06921359151601791, + "loss_ce": 0.0023610249627381563, + "loss_iou": 0.404296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 778336272, + "step": 8027 + }, + { + "epoch": 0.7849041845913179, + "grad_norm": 7.364774489150489, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 778432360, + "step": 8028 + }, + { + "epoch": 0.7849041845913179, + "loss": 0.05911459028720856, + "loss_ce": 0.004640711937099695, + "loss_iou": 0.2158203125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 778432360, + "step": 8028 + }, + { + "epoch": 0.7850019554165037, + "grad_norm": 20.715526602190735, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 778529508, + "step": 8029 + }, + { + "epoch": 0.7850019554165037, + "loss": 0.09115003049373627, + "loss_ce": 0.0036485036835074425, + "loss_iou": 0.267578125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 778529508, + "step": 8029 + }, + { + "epoch": 0.7850997262416894, + "grad_norm": 3.9797677530630837, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 778626148, + "step": 8030 + }, + { + "epoch": 0.7850997262416894, + "loss": 0.07529895752668381, + "loss_ce": 0.001583743724040687, + "loss_iou": 0.26953125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 778626148, + "step": 8030 + }, + { + "epoch": 0.7851974970668752, + "grad_norm": 7.012852985269389, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 778722944, + "step": 8031 + }, + { + "epoch": 0.7851974970668752, + "loss": 0.03757716715335846, + "loss_ce": 0.005487932823598385, + "loss_iou": 0.298828125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 778722944, + "step": 8031 + }, + { + "epoch": 0.785295267892061, + "grad_norm": 9.392363605402997, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 778819828, + "step": 8032 + }, + { + "epoch": 0.785295267892061, + "loss": 0.07183672487735748, + "loss_ce": 0.006395593285560608, + "loss_iou": 0.2578125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 778819828, + "step": 8032 + }, + { + "epoch": 0.7853930387172467, + "grad_norm": 4.864254348679057, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 778917184, + "step": 8033 + }, + { + "epoch": 0.7853930387172467, + "loss": 0.06458839774131775, + "loss_ce": 0.006337969098240137, + "loss_iou": 0.21484375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 778917184, + "step": 8033 + }, + { + "epoch": 0.7854908095424326, + "grad_norm": 9.339591934397394, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 779013472, + "step": 8034 + }, + { + "epoch": 0.7854908095424326, + "loss": 0.06669381260871887, + "loss_ce": 0.0020041719544678926, + "loss_iou": 0.263671875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 779013472, + "step": 8034 + }, + { + "epoch": 0.7855885803676182, + "grad_norm": 8.375635446985369, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 779110296, + "step": 8035 + }, + { + "epoch": 0.7855885803676182, + "loss": 0.07456237822771072, + "loss_ce": 0.005424804519861937, + "loss_iou": 0.26171875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 779110296, + "step": 8035 + }, + { + "epoch": 0.7856863511928041, + "grad_norm": 5.438793802166534, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 779207720, + "step": 8036 + }, + { + "epoch": 0.7856863511928041, + "loss": 0.06617406010627747, + "loss_ce": 0.006413014139980078, + "loss_iou": 0.244140625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 779207720, + "step": 8036 + }, + { + "epoch": 0.7857841220179899, + "grad_norm": 4.5121688142100815, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 779304708, + "step": 8037 + }, + { + "epoch": 0.7857841220179899, + "loss": 0.11646910756826401, + "loss_ce": 0.002638538833707571, + "loss_iou": 0.19921875, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 779304708, + "step": 8037 + }, + { + "epoch": 0.7858818928431756, + "grad_norm": 6.1015981017729795, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 779401704, + "step": 8038 + }, + { + "epoch": 0.7858818928431756, + "loss": 0.08236616849899292, + "loss_ce": 0.0017082075355574489, + "loss_iou": 0.298828125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 779401704, + "step": 8038 + }, + { + "epoch": 0.7859796636683614, + "grad_norm": 12.286724132581774, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 779498772, + "step": 8039 + }, + { + "epoch": 0.7859796636683614, + "loss": 0.06180962175130844, + "loss_ce": 0.005046927370131016, + "loss_iou": 0.34765625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 779498772, + "step": 8039 + }, + { + "epoch": 0.7860774344935472, + "grad_norm": 8.48608700418349, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 779595412, + "step": 8040 + }, + { + "epoch": 0.7860774344935472, + "loss": 0.05701300501823425, + "loss_ce": 0.003973457030951977, + "loss_iou": 0.384765625, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 779595412, + "step": 8040 + }, + { + "epoch": 0.7861752053187329, + "grad_norm": 9.37581064734428, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 779692552, + "step": 8041 + }, + { + "epoch": 0.7861752053187329, + "loss": 0.05580352619290352, + "loss_ce": 0.0049307262524962425, + "loss_iou": 0.306640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 779692552, + "step": 8041 + }, + { + "epoch": 0.7862729761439187, + "grad_norm": 3.971409438875176, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 779789116, + "step": 8042 + }, + { + "epoch": 0.7862729761439187, + "loss": 0.06618443876504898, + "loss_ce": 0.0038522863760590553, + "loss_iou": 0.291015625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 779789116, + "step": 8042 + }, + { + "epoch": 0.7863707469691045, + "grad_norm": 40.5143968195831, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 779886292, + "step": 8043 + }, + { + "epoch": 0.7863707469691045, + "loss": 0.06359536200761795, + "loss_ce": 0.005947654135525227, + "loss_iou": 0.40625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 779886292, + "step": 8043 + }, + { + "epoch": 0.7864685177942902, + "grad_norm": 13.852299364439256, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 779982940, + "step": 8044 + }, + { + "epoch": 0.7864685177942902, + "loss": 0.07840435206890106, + "loss_ce": 0.0067414576187729836, + "loss_iou": 0.333984375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 779982940, + "step": 8044 + }, + { + "epoch": 0.786566288619476, + "grad_norm": 2.2808527489561343, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 780080292, + "step": 8045 + }, + { + "epoch": 0.786566288619476, + "loss": 0.035538025200366974, + "loss_ce": 0.0070040905848145485, + "loss_iou": 0.224609375, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 780080292, + "step": 8045 + }, + { + "epoch": 0.7866640594446617, + "grad_norm": 8.728558863442007, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 780176148, + "step": 8046 + }, + { + "epoch": 0.7866640594446617, + "loss": 0.03568238019943237, + "loss_ce": 0.0027233967557549477, + "loss_iou": 0.140625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 780176148, + "step": 8046 + }, + { + "epoch": 0.7867618302698475, + "grad_norm": 24.388066401043602, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 780272828, + "step": 8047 + }, + { + "epoch": 0.7867618302698475, + "loss": 0.06221242994070053, + "loss_ce": 0.007296049501746893, + "loss_iou": 0.2236328125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 780272828, + "step": 8047 + }, + { + "epoch": 0.7868596010950333, + "grad_norm": 3.1146842328338793, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 780368648, + "step": 8048 + }, + { + "epoch": 0.7868596010950333, + "loss": 0.10978244245052338, + "loss_ce": 0.004134397488087416, + "loss_iou": 0.2177734375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 780368648, + "step": 8048 + }, + { + "epoch": 0.786957371920219, + "grad_norm": 28.491858230235117, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 780465676, + "step": 8049 + }, + { + "epoch": 0.786957371920219, + "loss": 0.08304610848426819, + "loss_ce": 0.006248628720641136, + "loss_iou": 0.1865234375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 780465676, + "step": 8049 + }, + { + "epoch": 0.7870551427454048, + "grad_norm": 7.052358291942668, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 780562828, + "step": 8050 + }, + { + "epoch": 0.7870551427454048, + "loss": 0.07789841294288635, + "loss_ce": 0.006182108074426651, + "loss_iou": 0.2734375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 780562828, + "step": 8050 + }, + { + "epoch": 0.7871529135705906, + "grad_norm": 21.107552051663518, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 780659912, + "step": 8051 + }, + { + "epoch": 0.7871529135705906, + "loss": 0.06903524696826935, + "loss_ce": 0.006352143362164497, + "loss_iou": 0.29296875, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 780659912, + "step": 8051 + }, + { + "epoch": 0.7872506843957763, + "grad_norm": 6.362127149036508, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 780755996, + "step": 8052 + }, + { + "epoch": 0.7872506843957763, + "loss": 0.08344953507184982, + "loss_ce": 0.003005202626809478, + "loss_iou": 0.314453125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 780755996, + "step": 8052 + }, + { + "epoch": 0.7873484552209621, + "grad_norm": 19.30634635117287, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 780853292, + "step": 8053 + }, + { + "epoch": 0.7873484552209621, + "loss": 0.12571024894714355, + "loss_ce": 0.011269336566329002, + "loss_iou": 0.33984375, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 780853292, + "step": 8053 + }, + { + "epoch": 0.7874462260461478, + "grad_norm": 7.364351719769526, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 780950596, + "step": 8054 + }, + { + "epoch": 0.7874462260461478, + "loss": 0.07771947979927063, + "loss_ce": 0.004401003010571003, + "loss_iou": 0.333984375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 780950596, + "step": 8054 + }, + { + "epoch": 0.7875439968713336, + "grad_norm": 4.70428160320615, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 781047536, + "step": 8055 + }, + { + "epoch": 0.7875439968713336, + "loss": 0.08209846913814545, + "loss_ce": 0.006796346977353096, + "loss_iou": 0.2412109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 781047536, + "step": 8055 + }, + { + "epoch": 0.7876417676965194, + "grad_norm": 4.1818961437865205, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 781144868, + "step": 8056 + }, + { + "epoch": 0.7876417676965194, + "loss": 0.07092580199241638, + "loss_ce": 0.004778947681188583, + "loss_iou": 0.32421875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 781144868, + "step": 8056 + }, + { + "epoch": 0.7877395385217051, + "grad_norm": 4.539524122964078, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 781242688, + "step": 8057 + }, + { + "epoch": 0.7877395385217051, + "loss": 0.0518854483962059, + "loss_ce": 0.0037897462025284767, + "loss_iou": 0.291015625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 781242688, + "step": 8057 + }, + { + "epoch": 0.7878373093468909, + "grad_norm": 7.06353314880689, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 781339860, + "step": 8058 + }, + { + "epoch": 0.7878373093468909, + "loss": 0.11889754235744476, + "loss_ce": 0.008393391966819763, + "loss_iou": 0.2734375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 781339860, + "step": 8058 + }, + { + "epoch": 0.7879350801720767, + "grad_norm": 6.30889297293238, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 781436616, + "step": 8059 + }, + { + "epoch": 0.7879350801720767, + "loss": 0.0469953790307045, + "loss_ce": 0.0007002120837569237, + "loss_iou": 0.32421875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 781436616, + "step": 8059 + }, + { + "epoch": 0.7880328509972624, + "grad_norm": 11.45191681158405, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 781533500, + "step": 8060 + }, + { + "epoch": 0.7880328509972624, + "loss": 0.08024992048740387, + "loss_ce": 0.003055705688893795, + "loss_iou": 0.3125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 781533500, + "step": 8060 + }, + { + "epoch": 0.7881306218224482, + "grad_norm": 6.268210415866731, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 781630792, + "step": 8061 + }, + { + "epoch": 0.7881306218224482, + "loss": 0.05094488710165024, + "loss_ce": 0.0029178473632782698, + "loss_iou": 0.310546875, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 781630792, + "step": 8061 + }, + { + "epoch": 0.7882283926476339, + "grad_norm": 5.699225939422226, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 781728332, + "step": 8062 + }, + { + "epoch": 0.7882283926476339, + "loss": 0.12330933660268784, + "loss_ce": 0.008609017357230186, + "loss_iou": 0.29296875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 781728332, + "step": 8062 + }, + { + "epoch": 0.7883261634728197, + "grad_norm": 10.803056420380413, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 781825168, + "step": 8063 + }, + { + "epoch": 0.7883261634728197, + "loss": 0.058681126683950424, + "loss_ce": 0.009280798956751823, + "loss_iou": 0.283203125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 781825168, + "step": 8063 + }, + { + "epoch": 0.7884239342980055, + "grad_norm": 4.961491544630334, + "learning_rate": 5e-05, + "loss": 0.1326, + "num_input_tokens_seen": 781921608, + "step": 8064 + }, + { + "epoch": 0.7884239342980055, + "loss": 0.15818393230438232, + "loss_ce": 0.008823288604617119, + "loss_iou": 0.3515625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 781921608, + "step": 8064 + }, + { + "epoch": 0.7885217051231912, + "grad_norm": 7.35188777668955, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 782017888, + "step": 8065 + }, + { + "epoch": 0.7885217051231912, + "loss": 0.07341233640909195, + "loss_ce": 0.008585370145738125, + "loss_iou": 0.13671875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 782017888, + "step": 8065 + }, + { + "epoch": 0.788619475948377, + "grad_norm": 12.00127135428994, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 782114640, + "step": 8066 + }, + { + "epoch": 0.788619475948377, + "loss": 0.06667652726173401, + "loss_ce": 0.0021547330543398857, + "loss_iou": 0.2470703125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 782114640, + "step": 8066 + }, + { + "epoch": 0.7887172467735628, + "grad_norm": 15.75499557592547, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 782211832, + "step": 8067 + }, + { + "epoch": 0.7887172467735628, + "loss": 0.1504903882741928, + "loss_ce": 0.00724087655544281, + "loss_iou": 0.33984375, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 782211832, + "step": 8067 + }, + { + "epoch": 0.7888150175987485, + "grad_norm": 5.514141773891696, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 782309832, + "step": 8068 + }, + { + "epoch": 0.7888150175987485, + "loss": 0.04996117204427719, + "loss_ce": 0.003017507027834654, + "loss_iou": 0.390625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 782309832, + "step": 8068 + }, + { + "epoch": 0.7889127884239343, + "grad_norm": 5.581388547396978, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 782407536, + "step": 8069 + }, + { + "epoch": 0.7889127884239343, + "loss": 0.07794392108917236, + "loss_ce": 0.006593819707632065, + "loss_iou": 0.306640625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 782407536, + "step": 8069 + }, + { + "epoch": 0.7890105592491201, + "grad_norm": 6.118206580171232, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 782505220, + "step": 8070 + }, + { + "epoch": 0.7890105592491201, + "loss": 0.05812172219157219, + "loss_ce": 0.003357927780598402, + "loss_iou": 0.26171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 782505220, + "step": 8070 + }, + { + "epoch": 0.7891083300743058, + "grad_norm": 4.889439700251374, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 782602584, + "step": 8071 + }, + { + "epoch": 0.7891083300743058, + "loss": 0.09398480504751205, + "loss_ce": 0.00822278019040823, + "loss_iou": 0.310546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 782602584, + "step": 8071 + }, + { + "epoch": 0.7892061008994916, + "grad_norm": 7.645261153899596, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 782699624, + "step": 8072 + }, + { + "epoch": 0.7892061008994916, + "loss": 0.09581391513347626, + "loss_ce": 0.00873200036585331, + "loss_iou": 0.28125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 782699624, + "step": 8072 + }, + { + "epoch": 0.7893038717246773, + "grad_norm": 4.625438955418478, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 782796276, + "step": 8073 + }, + { + "epoch": 0.7893038717246773, + "loss": 0.05803733319044113, + "loss_ce": 0.0039678169414401054, + "loss_iou": 0.287109375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 782796276, + "step": 8073 + }, + { + "epoch": 0.7894016425498631, + "grad_norm": 9.89371154306445, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 782893480, + "step": 8074 + }, + { + "epoch": 0.7894016425498631, + "loss": 0.09772685170173645, + "loss_ce": 0.005640067160129547, + "loss_iou": 0.322265625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 782893480, + "step": 8074 + }, + { + "epoch": 0.7894994133750489, + "grad_norm": 10.160139707888241, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 782990732, + "step": 8075 + }, + { + "epoch": 0.7894994133750489, + "loss": 0.0686202198266983, + "loss_ce": 0.001763839041814208, + "loss_iou": 0.263671875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 782990732, + "step": 8075 + }, + { + "epoch": 0.7895971842002346, + "grad_norm": 11.0885870666785, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 783087256, + "step": 8076 + }, + { + "epoch": 0.7895971842002346, + "loss": 0.09092067182064056, + "loss_ce": 0.00389216560870409, + "loss_iou": 0.287109375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 783087256, + "step": 8076 + }, + { + "epoch": 0.7896949550254204, + "grad_norm": 10.693709267285175, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 783184272, + "step": 8077 + }, + { + "epoch": 0.7896949550254204, + "loss": 0.08833832293748856, + "loss_ce": 0.006185000762343407, + "loss_iou": 0.310546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 783184272, + "step": 8077 + }, + { + "epoch": 0.7897927258506062, + "grad_norm": 7.558283714047323, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 783280940, + "step": 8078 + }, + { + "epoch": 0.7897927258506062, + "loss": 0.062126800417900085, + "loss_ce": 0.0034414948895573616, + "loss_iou": 0.302734375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 783280940, + "step": 8078 + }, + { + "epoch": 0.7898904966757919, + "grad_norm": 17.403719515891147, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 783378272, + "step": 8079 + }, + { + "epoch": 0.7898904966757919, + "loss": 0.0586281344294548, + "loss_ce": 0.010578206740319729, + "loss_iou": 0.1904296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 783378272, + "step": 8079 + }, + { + "epoch": 0.7899882675009777, + "grad_norm": 9.239667736289901, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 783475092, + "step": 8080 + }, + { + "epoch": 0.7899882675009777, + "loss": 0.07009078562259674, + "loss_ce": 0.006843105424195528, + "loss_iou": 0.30078125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 783475092, + "step": 8080 + }, + { + "epoch": 0.7900860383261634, + "grad_norm": 6.112744023740525, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 783571656, + "step": 8081 + }, + { + "epoch": 0.7900860383261634, + "loss": 0.09120858460664749, + "loss_ce": 0.007056360598653555, + "loss_iou": 0.24609375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 783571656, + "step": 8081 + }, + { + "epoch": 0.7901838091513492, + "grad_norm": 7.235035819477297, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 783669000, + "step": 8082 + }, + { + "epoch": 0.7901838091513492, + "loss": 0.09987234324216843, + "loss_ce": 0.007343044970184565, + "loss_iou": 0.341796875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 783669000, + "step": 8082 + }, + { + "epoch": 0.790281579976535, + "grad_norm": 8.336874619104746, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 783765652, + "step": 8083 + }, + { + "epoch": 0.790281579976535, + "loss": 0.11295454949140549, + "loss_ce": 0.008614949882030487, + "loss_iou": 0.30078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 783765652, + "step": 8083 + }, + { + "epoch": 0.7903793508017207, + "grad_norm": 4.723383807366505, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 783862468, + "step": 8084 + }, + { + "epoch": 0.7903793508017207, + "loss": 0.07493595033884048, + "loss_ce": 0.0055389790795743465, + "loss_iou": 0.30078125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 783862468, + "step": 8084 + }, + { + "epoch": 0.7904771216269065, + "grad_norm": 8.330886953988562, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 783959288, + "step": 8085 + }, + { + "epoch": 0.7904771216269065, + "loss": 0.07359437644481659, + "loss_ce": 0.002335831755772233, + "loss_iou": 0.3359375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 783959288, + "step": 8085 + }, + { + "epoch": 0.7905748924520923, + "grad_norm": 2.722378171744988, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 784055912, + "step": 8086 + }, + { + "epoch": 0.7905748924520923, + "loss": 0.056968189775943756, + "loss_ce": 0.007125353906303644, + "loss_iou": 0.26171875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 784055912, + "step": 8086 + }, + { + "epoch": 0.790672663277278, + "grad_norm": 12.902393561108935, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 784153652, + "step": 8087 + }, + { + "epoch": 0.790672663277278, + "loss": 0.07898470014333725, + "loss_ce": 0.006894552614539862, + "loss_iou": 0.333984375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 784153652, + "step": 8087 + }, + { + "epoch": 0.7907704341024638, + "grad_norm": 13.464015805685785, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 784250004, + "step": 8088 + }, + { + "epoch": 0.7907704341024638, + "loss": 0.08824806660413742, + "loss_ce": 0.007349781692028046, + "loss_iou": 0.2470703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 784250004, + "step": 8088 + }, + { + "epoch": 0.7908682049276496, + "grad_norm": 3.454513170076571, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 784346492, + "step": 8089 + }, + { + "epoch": 0.7908682049276496, + "loss": 0.08397893607616425, + "loss_ce": 0.0022070864215493202, + "loss_iou": 0.216796875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 784346492, + "step": 8089 + }, + { + "epoch": 0.7909659757528353, + "grad_norm": 7.466074668087594, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 784443388, + "step": 8090 + }, + { + "epoch": 0.7909659757528353, + "loss": 0.09865978360176086, + "loss_ce": 0.002887996146455407, + "loss_iou": 0.2255859375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 784443388, + "step": 8090 + }, + { + "epoch": 0.7910637465780211, + "grad_norm": 4.919961904021783, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 784540292, + "step": 8091 + }, + { + "epoch": 0.7910637465780211, + "loss": 0.06812885403633118, + "loss_ce": 0.005190165713429451, + "loss_iou": 0.28125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 784540292, + "step": 8091 + }, + { + "epoch": 0.7911615174032068, + "grad_norm": 27.71504484400247, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 784637424, + "step": 8092 + }, + { + "epoch": 0.7911615174032068, + "loss": 0.09153673052787781, + "loss_ce": 0.008467884734272957, + "loss_iou": 0.33203125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 784637424, + "step": 8092 + }, + { + "epoch": 0.7912592882283926, + "grad_norm": 6.253212714369619, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 784733948, + "step": 8093 + }, + { + "epoch": 0.7912592882283926, + "loss": 0.11827673017978668, + "loss_ce": 0.006185661070048809, + "loss_iou": 0.27734375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 784733948, + "step": 8093 + }, + { + "epoch": 0.7913570590535784, + "grad_norm": 12.99333482768417, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 784831636, + "step": 8094 + }, + { + "epoch": 0.7913570590535784, + "loss": 0.08993631601333618, + "loss_ce": 0.004975374322384596, + "loss_iou": 0.2109375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 784831636, + "step": 8094 + }, + { + "epoch": 0.7914548298787641, + "grad_norm": 1.9246776872043565, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 784929416, + "step": 8095 + }, + { + "epoch": 0.7914548298787641, + "loss": 0.12675614655017853, + "loss_ce": 0.007173024117946625, + "loss_iou": 0.275390625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 784929416, + "step": 8095 + }, + { + "epoch": 0.79155260070395, + "grad_norm": 2.330769123043584, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 785025840, + "step": 8096 + }, + { + "epoch": 0.79155260070395, + "loss": 0.05735098198056221, + "loss_ce": 0.006447661202400923, + "loss_iou": 0.1748046875, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 785025840, + "step": 8096 + }, + { + "epoch": 0.7916503715291358, + "grad_norm": 8.547513406715264, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 785122024, + "step": 8097 + }, + { + "epoch": 0.7916503715291358, + "loss": 0.047576576471328735, + "loss_ce": 0.0015942160971462727, + "loss_iou": 0.255859375, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 785122024, + "step": 8097 + }, + { + "epoch": 0.7917481423543214, + "grad_norm": 5.570689270364462, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 785219004, + "step": 8098 + }, + { + "epoch": 0.7917481423543214, + "loss": 0.09376776963472366, + "loss_ce": 0.003328924300149083, + "loss_iou": 0.2421875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 785219004, + "step": 8098 + }, + { + "epoch": 0.7918459131795073, + "grad_norm": 5.162564016133992, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 785315916, + "step": 8099 + }, + { + "epoch": 0.7918459131795073, + "loss": 0.10593512654304504, + "loss_ce": 0.006691953632980585, + "loss_iou": 0.30078125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 785315916, + "step": 8099 + }, + { + "epoch": 0.791943684004693, + "grad_norm": 9.110603906830736, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 785412912, + "step": 8100 + }, + { + "epoch": 0.791943684004693, + "loss": 0.12809590995311737, + "loss_ce": 0.0035002687945961952, + "loss_iou": 0.263671875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 785412912, + "step": 8100 + }, + { + "epoch": 0.7920414548298788, + "grad_norm": 3.535286678655385, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 785510012, + "step": 8101 + }, + { + "epoch": 0.7920414548298788, + "loss": 0.11973335593938828, + "loss_ce": 0.006627581547945738, + "loss_iou": 0.298828125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 785510012, + "step": 8101 + }, + { + "epoch": 0.7921392256550646, + "grad_norm": 5.660331144419576, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 785606212, + "step": 8102 + }, + { + "epoch": 0.7921392256550646, + "loss": 0.10845103114843369, + "loss_ce": 0.005545757710933685, + "loss_iou": 0.25390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 785606212, + "step": 8102 + }, + { + "epoch": 0.7922369964802503, + "grad_norm": 5.009256131443829, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 785704052, + "step": 8103 + }, + { + "epoch": 0.7922369964802503, + "loss": 0.07729493081569672, + "loss_ce": 0.009980782866477966, + "loss_iou": 0.1708984375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 785704052, + "step": 8103 + }, + { + "epoch": 0.7923347673054361, + "grad_norm": 4.638061898009849, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 785801452, + "step": 8104 + }, + { + "epoch": 0.7923347673054361, + "loss": 0.09603598713874817, + "loss_ce": 0.005032569169998169, + "loss_iou": 0.271484375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 785801452, + "step": 8104 + }, + { + "epoch": 0.7924325381306219, + "grad_norm": 28.829410435168242, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 785898000, + "step": 8105 + }, + { + "epoch": 0.7924325381306219, + "loss": 0.11300204694271088, + "loss_ce": 0.005366549827158451, + "loss_iou": 0.23046875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 785898000, + "step": 8105 + }, + { + "epoch": 0.7925303089558076, + "grad_norm": 11.89752837195704, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 785994572, + "step": 8106 + }, + { + "epoch": 0.7925303089558076, + "loss": 0.05423908308148384, + "loss_ce": 0.006143382750451565, + "loss_iou": 0.2734375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 785994572, + "step": 8106 + }, + { + "epoch": 0.7926280797809934, + "grad_norm": 8.281272476243, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 786092196, + "step": 8107 + }, + { + "epoch": 0.7926280797809934, + "loss": 0.05495626851916313, + "loss_ce": 0.0033968198113143444, + "loss_iou": 0.224609375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 786092196, + "step": 8107 + }, + { + "epoch": 0.7927258506061791, + "grad_norm": 11.005000274332865, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 786188760, + "step": 8108 + }, + { + "epoch": 0.7927258506061791, + "loss": 0.04594806581735611, + "loss_ce": 0.0070610446855425835, + "loss_iou": 0.234375, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 786188760, + "step": 8108 + }, + { + "epoch": 0.7928236214313649, + "grad_norm": 11.803303165027778, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 786286088, + "step": 8109 + }, + { + "epoch": 0.7928236214313649, + "loss": 0.08468082547187805, + "loss_ce": 0.004999426193535328, + "loss_iou": 0.27734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 786286088, + "step": 8109 + }, + { + "epoch": 0.7929213922565507, + "grad_norm": 12.839213878036135, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 786382324, + "step": 8110 + }, + { + "epoch": 0.7929213922565507, + "loss": 0.09251154959201813, + "loss_ce": 0.003964792005717754, + "loss_iou": 0.1875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 786382324, + "step": 8110 + }, + { + "epoch": 0.7930191630817364, + "grad_norm": 18.249488130101607, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 786479508, + "step": 8111 + }, + { + "epoch": 0.7930191630817364, + "loss": 0.06712993234395981, + "loss_ce": 0.0061405557207763195, + "loss_iou": 0.330078125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 786479508, + "step": 8111 + }, + { + "epoch": 0.7931169339069222, + "grad_norm": 22.070607528881894, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 786576576, + "step": 8112 + }, + { + "epoch": 0.7931169339069222, + "loss": 0.07186876237392426, + "loss_ce": 0.003295762464404106, + "loss_iou": 0.32421875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 786576576, + "step": 8112 + }, + { + "epoch": 0.793214704732108, + "grad_norm": 7.439013065002409, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 786674860, + "step": 8113 + }, + { + "epoch": 0.793214704732108, + "loss": 0.04074672982096672, + "loss_ce": 0.004705470986664295, + "loss_iou": 0.38671875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 786674860, + "step": 8113 + }, + { + "epoch": 0.7933124755572937, + "grad_norm": 3.8316950301408133, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 786771448, + "step": 8114 + }, + { + "epoch": 0.7933124755572937, + "loss": 0.041884128004312515, + "loss_ce": 0.007897083647549152, + "loss_iou": 0.2734375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 786771448, + "step": 8114 + }, + { + "epoch": 0.7934102463824795, + "grad_norm": 6.728305363913888, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 786868708, + "step": 8115 + }, + { + "epoch": 0.7934102463824795, + "loss": 0.07797791808843613, + "loss_ce": 0.008306287229061127, + "loss_iou": 0.283203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 786868708, + "step": 8115 + }, + { + "epoch": 0.7935080172076653, + "grad_norm": 20.03189568631055, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 786965324, + "step": 8116 + }, + { + "epoch": 0.7935080172076653, + "loss": 0.07206746935844421, + "loss_ce": 0.0066072652116417885, + "loss_iou": 0.328125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 786965324, + "step": 8116 + }, + { + "epoch": 0.793605788032851, + "grad_norm": 54.438567898881125, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 787062356, + "step": 8117 + }, + { + "epoch": 0.793605788032851, + "loss": 0.06072806566953659, + "loss_ce": 0.003736487589776516, + "loss_iou": 0.33203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 787062356, + "step": 8117 + }, + { + "epoch": 0.7937035588580368, + "grad_norm": 8.18824387682074, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 787157492, + "step": 8118 + }, + { + "epoch": 0.7937035588580368, + "loss": 0.10002283751964569, + "loss_ce": 0.0033660312183201313, + "loss_iou": 0.3046875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 787157492, + "step": 8118 + }, + { + "epoch": 0.7938013296832225, + "grad_norm": 11.29080883496827, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 787254356, + "step": 8119 + }, + { + "epoch": 0.7938013296832225, + "loss": 0.04023715853691101, + "loss_ce": 0.004394260700792074, + "loss_iou": 0.203125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 787254356, + "step": 8119 + }, + { + "epoch": 0.7938991005084083, + "grad_norm": 3.1884858159473017, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 787352272, + "step": 8120 + }, + { + "epoch": 0.7938991005084083, + "loss": 0.04559309780597687, + "loss_ce": 0.001853777328506112, + "loss_iou": 0.265625, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 787352272, + "step": 8120 + }, + { + "epoch": 0.7939968713335941, + "grad_norm": 8.828831507751145, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 787448244, + "step": 8121 + }, + { + "epoch": 0.7939968713335941, + "loss": 0.05787850171327591, + "loss_ce": 0.006620415486395359, + "loss_iou": 0.1337890625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 787448244, + "step": 8121 + }, + { + "epoch": 0.7940946421587798, + "grad_norm": 9.068154512135038, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 787546160, + "step": 8122 + }, + { + "epoch": 0.7940946421587798, + "loss": 0.07626298815011978, + "loss_ce": 0.006339586805552244, + "loss_iou": 0.1953125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 787546160, + "step": 8122 + }, + { + "epoch": 0.7941924129839656, + "grad_norm": 7.954379201171918, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 787643196, + "step": 8123 + }, + { + "epoch": 0.7941924129839656, + "loss": 0.06847460567951202, + "loss_ce": 0.001721215550787747, + "loss_iou": 0.298828125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 787643196, + "step": 8123 + }, + { + "epoch": 0.7942901838091514, + "grad_norm": 5.522079578919698, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 787740160, + "step": 8124 + }, + { + "epoch": 0.7942901838091514, + "loss": 0.10385990142822266, + "loss_ce": 0.004113194532692432, + "loss_iou": 0.330078125, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 787740160, + "step": 8124 + }, + { + "epoch": 0.7943879546343371, + "grad_norm": 2.643497589437108, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 787836164, + "step": 8125 + }, + { + "epoch": 0.7943879546343371, + "loss": 0.09059463441371918, + "loss_ce": 0.005053860135376453, + "loss_iou": 0.279296875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 787836164, + "step": 8125 + }, + { + "epoch": 0.7944857254595229, + "grad_norm": 2.913380024097633, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 787933724, + "step": 8126 + }, + { + "epoch": 0.7944857254595229, + "loss": 0.05336228758096695, + "loss_ce": 0.004511273931711912, + "loss_iou": 0.2236328125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 787933724, + "step": 8126 + }, + { + "epoch": 0.7945834962847086, + "grad_norm": 9.518043752661816, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 788030284, + "step": 8127 + }, + { + "epoch": 0.7945834962847086, + "loss": 0.13121578097343445, + "loss_ce": 0.007863723672926426, + "loss_iou": 0.2109375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 788030284, + "step": 8127 + }, + { + "epoch": 0.7946812671098944, + "grad_norm": 3.1305814117613653, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 788126500, + "step": 8128 + }, + { + "epoch": 0.7946812671098944, + "loss": 0.04052355885505676, + "loss_ce": 0.005725891795009375, + "loss_iou": 0.2314453125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 788126500, + "step": 8128 + }, + { + "epoch": 0.7947790379350802, + "grad_norm": 9.688517592260386, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 788222572, + "step": 8129 + }, + { + "epoch": 0.7947790379350802, + "loss": 0.08171923458576202, + "loss_ce": 0.0064018527045845985, + "loss_iou": 0.28515625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 788222572, + "step": 8129 + }, + { + "epoch": 0.7948768087602659, + "grad_norm": 7.341052886959791, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 788319300, + "step": 8130 + }, + { + "epoch": 0.7948768087602659, + "loss": 0.07144397497177124, + "loss_ce": 0.007254065480083227, + "loss_iou": 0.197265625, + "loss_num": 0.01287841796875, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 788319300, + "step": 8130 + }, + { + "epoch": 0.7949745795854517, + "grad_norm": 3.139831202261417, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 788415748, + "step": 8131 + }, + { + "epoch": 0.7949745795854517, + "loss": 0.09510871767997742, + "loss_ce": 0.007988662458956242, + "loss_iou": 0.2158203125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 788415748, + "step": 8131 + }, + { + "epoch": 0.7950723504106375, + "grad_norm": 14.51918923554553, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 788512436, + "step": 8132 + }, + { + "epoch": 0.7950723504106375, + "loss": 0.05930766835808754, + "loss_ce": 0.006741135381162167, + "loss_iou": 0.279296875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 788512436, + "step": 8132 + }, + { + "epoch": 0.7951701212358232, + "grad_norm": 8.740327760944746, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 788609780, + "step": 8133 + }, + { + "epoch": 0.7951701212358232, + "loss": 0.05282117426395416, + "loss_ce": 0.0047941384837031364, + "loss_iou": 0.341796875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 788609780, + "step": 8133 + }, + { + "epoch": 0.795267892061009, + "grad_norm": 2.880609197545119, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 788706164, + "step": 8134 + }, + { + "epoch": 0.795267892061009, + "loss": 0.0640970766544342, + "loss_ce": 0.0050684502348303795, + "loss_iou": 0.322265625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 788706164, + "step": 8134 + }, + { + "epoch": 0.7953656628861948, + "grad_norm": 4.014525515434603, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 788802512, + "step": 8135 + }, + { + "epoch": 0.7953656628861948, + "loss": 0.04986017942428589, + "loss_ce": 0.0070440201088786125, + "loss_iou": 0.2490234375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 788802512, + "step": 8135 + }, + { + "epoch": 0.7954634337113805, + "grad_norm": 5.168048095248133, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 788899260, + "step": 8136 + }, + { + "epoch": 0.7954634337113805, + "loss": 0.05145040154457092, + "loss_ce": 0.00518575357273221, + "loss_iou": 0.36328125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 788899260, + "step": 8136 + }, + { + "epoch": 0.7955612045365663, + "grad_norm": 19.763013586214488, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 788996660, + "step": 8137 + }, + { + "epoch": 0.7955612045365663, + "loss": 0.10421718657016754, + "loss_ce": 0.037688858807086945, + "loss_iou": 0.3828125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 788996660, + "step": 8137 + }, + { + "epoch": 0.795658975361752, + "grad_norm": 106.92725157868175, + "learning_rate": 5e-05, + "loss": 2.5099, + "num_input_tokens_seen": 789093672, + "step": 8138 + }, + { + "epoch": 0.795658975361752, + "loss": 2.497570037841797, + "loss_ce": 2.425975799560547, + "loss_iou": 0.341796875, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 789093672, + "step": 8138 + }, + { + "epoch": 0.7957567461869378, + "grad_norm": 22.42222345059229, + "learning_rate": 5e-05, + "loss": 0.5775, + "num_input_tokens_seen": 789189852, + "step": 8139 + }, + { + "epoch": 0.7957567461869378, + "loss": 0.5412375330924988, + "loss_ce": 0.48838871717453003, + "loss_iou": 0.2314453125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 789189852, + "step": 8139 + }, + { + "epoch": 0.7958545170121236, + "grad_norm": 14.841425670948373, + "learning_rate": 5e-05, + "loss": 0.293, + "num_input_tokens_seen": 789287300, + "step": 8140 + }, + { + "epoch": 0.7958545170121236, + "loss": 0.2775872051715851, + "loss_ce": 0.18423393368721008, + "loss_iou": 0.326171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 789287300, + "step": 8140 + }, + { + "epoch": 0.7959522878373093, + "grad_norm": 11.93368897917031, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 789384884, + "step": 8141 + }, + { + "epoch": 0.7959522878373093, + "loss": 0.13853245973587036, + "loss_ce": 0.029348192736506462, + "loss_iou": 0.30078125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 789384884, + "step": 8141 + }, + { + "epoch": 0.7960500586624951, + "grad_norm": 15.613320956850181, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 789482120, + "step": 8142 + }, + { + "epoch": 0.7960500586624951, + "loss": 0.06549210101366043, + "loss_ce": 0.006143042352050543, + "loss_iou": 0.244140625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 789482120, + "step": 8142 + }, + { + "epoch": 0.7961478294876809, + "grad_norm": 11.94235090937948, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 789579676, + "step": 8143 + }, + { + "epoch": 0.7961478294876809, + "loss": 0.06361959874629974, + "loss_ce": 0.00804708432406187, + "loss_iou": 0.404296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 789579676, + "step": 8143 + }, + { + "epoch": 0.7962456003128666, + "grad_norm": 8.707571442806568, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 789676492, + "step": 8144 + }, + { + "epoch": 0.7962456003128666, + "loss": 0.0968913584947586, + "loss_ce": 0.007673213258385658, + "loss_iou": 0.328125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 789676492, + "step": 8144 + }, + { + "epoch": 0.7963433711380524, + "grad_norm": 5.346292535071331, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 789772636, + "step": 8145 + }, + { + "epoch": 0.7963433711380524, + "loss": 0.10122309625148773, + "loss_ce": 0.006969555281102657, + "loss_iou": 0.1689453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 789772636, + "step": 8145 + }, + { + "epoch": 0.7964411419632381, + "grad_norm": 6.137367037281362, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 789868968, + "step": 8146 + }, + { + "epoch": 0.7964411419632381, + "loss": 0.09092028439044952, + "loss_ce": 0.006584957242012024, + "loss_iou": 0.28125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 789868968, + "step": 8146 + }, + { + "epoch": 0.7965389127884239, + "grad_norm": 9.230644742531524, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 789966376, + "step": 8147 + }, + { + "epoch": 0.7965389127884239, + "loss": 0.06441238522529602, + "loss_ce": 0.004826811142265797, + "loss_iou": 0.357421875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 789966376, + "step": 8147 + }, + { + "epoch": 0.7966366836136097, + "grad_norm": 8.844442586642023, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 790063736, + "step": 8148 + }, + { + "epoch": 0.7966366836136097, + "loss": 0.06574246287345886, + "loss_ce": 0.006996128708124161, + "loss_iou": 0.41015625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 790063736, + "step": 8148 + }, + { + "epoch": 0.7967344544387954, + "grad_norm": 7.645646870326204, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 790160816, + "step": 8149 + }, + { + "epoch": 0.7967344544387954, + "loss": 0.06673917174339294, + "loss_ce": 0.007001017685979605, + "loss_iou": 0.1982421875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 790160816, + "step": 8149 + }, + { + "epoch": 0.7968322252639812, + "grad_norm": 4.295780924923547, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 790256916, + "step": 8150 + }, + { + "epoch": 0.7968322252639812, + "loss": 0.0719204992055893, + "loss_ce": 0.005140405613929033, + "loss_iou": 0.328125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 790256916, + "step": 8150 + }, + { + "epoch": 0.796929996089167, + "grad_norm": 9.834478150328772, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 790353564, + "step": 8151 + }, + { + "epoch": 0.796929996089167, + "loss": 0.047902777791023254, + "loss_ce": 0.002416325267404318, + "loss_iou": 0.2060546875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 790353564, + "step": 8151 + }, + { + "epoch": 0.7970277669143527, + "grad_norm": 47.40133967022419, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 790449644, + "step": 8152 + }, + { + "epoch": 0.7970277669143527, + "loss": 0.08572994917631149, + "loss_ce": 0.005895968526601791, + "loss_iou": 0.1875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 790449644, + "step": 8152 + }, + { + "epoch": 0.7971255377395385, + "grad_norm": 18.314757598208615, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 790546324, + "step": 8153 + }, + { + "epoch": 0.7971255377395385, + "loss": 0.07831677794456482, + "loss_ce": 0.006211366970092058, + "loss_iou": 0.3046875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 790546324, + "step": 8153 + }, + { + "epoch": 0.7972233085647242, + "grad_norm": 7.204695234803529, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 790643208, + "step": 8154 + }, + { + "epoch": 0.7972233085647242, + "loss": 0.09552808851003647, + "loss_ce": 0.00618787482380867, + "loss_iou": 0.267578125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 790643208, + "step": 8154 + }, + { + "epoch": 0.79732107938991, + "grad_norm": 9.005022744168206, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 790740208, + "step": 8155 + }, + { + "epoch": 0.79732107938991, + "loss": 0.055924151092767715, + "loss_ce": 0.005295486655086279, + "loss_iou": 0.35546875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 790740208, + "step": 8155 + }, + { + "epoch": 0.7974188502150958, + "grad_norm": 8.330930655159387, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 790837352, + "step": 8156 + }, + { + "epoch": 0.7974188502150958, + "loss": 0.0922909826040268, + "loss_ce": 0.007261382415890694, + "loss_iou": 0.236328125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 790837352, + "step": 8156 + }, + { + "epoch": 0.7975166210402815, + "grad_norm": 3.9492746726856294, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 790934416, + "step": 8157 + }, + { + "epoch": 0.7975166210402815, + "loss": 0.056521400809288025, + "loss_ce": 0.005190832540392876, + "loss_iou": 0.279296875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 790934416, + "step": 8157 + }, + { + "epoch": 0.7976143918654673, + "grad_norm": 15.69289999826526, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 791032268, + "step": 8158 + }, + { + "epoch": 0.7976143918654673, + "loss": 0.0876685231924057, + "loss_ce": 0.005789854098111391, + "loss_iou": 0.390625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 791032268, + "step": 8158 + }, + { + "epoch": 0.7977121626906531, + "grad_norm": 23.422797913233293, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 791129304, + "step": 8159 + }, + { + "epoch": 0.7977121626906531, + "loss": 0.11105699837207794, + "loss_ce": 0.00802965834736824, + "loss_iou": 0.314453125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 791129304, + "step": 8159 + }, + { + "epoch": 0.7978099335158388, + "grad_norm": 4.438970248705814, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 791225676, + "step": 8160 + }, + { + "epoch": 0.7978099335158388, + "loss": 0.12250305712223053, + "loss_ce": 0.006566773634403944, + "loss_iou": 0.158203125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 791225676, + "step": 8160 + }, + { + "epoch": 0.7979077043410246, + "grad_norm": 6.518241980399765, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 791322960, + "step": 8161 + }, + { + "epoch": 0.7979077043410246, + "loss": 0.11545692384243011, + "loss_ce": 0.00951133668422699, + "loss_iou": 0.259765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 791322960, + "step": 8161 + }, + { + "epoch": 0.7980054751662105, + "grad_norm": 5.241125426024021, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 791418944, + "step": 8162 + }, + { + "epoch": 0.7980054751662105, + "loss": 0.07811717689037323, + "loss_ce": 0.003371992614120245, + "loss_iou": 0.228515625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 791418944, + "step": 8162 + }, + { + "epoch": 0.7981032459913961, + "grad_norm": 2.136033874889887, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 791515960, + "step": 8163 + }, + { + "epoch": 0.7981032459913961, + "loss": 0.05688918009400368, + "loss_ce": 0.0044218311086297035, + "loss_iou": 0.2060546875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 791515960, + "step": 8163 + }, + { + "epoch": 0.798201016816582, + "grad_norm": 4.604687949032447, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 791612724, + "step": 8164 + }, + { + "epoch": 0.798201016816582, + "loss": 0.08055934309959412, + "loss_ce": 0.004891005344688892, + "loss_iou": 0.306640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 791612724, + "step": 8164 + }, + { + "epoch": 0.7982987876417676, + "grad_norm": 3.372537911123067, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 791709364, + "step": 8165 + }, + { + "epoch": 0.7982987876417676, + "loss": 0.055896587669849396, + "loss_ce": 0.00560361985117197, + "loss_iou": 0.298828125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 791709364, + "step": 8165 + }, + { + "epoch": 0.7983965584669535, + "grad_norm": 7.039683480687251, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 791806336, + "step": 8166 + }, + { + "epoch": 0.7983965584669535, + "loss": 0.10964588075876236, + "loss_ce": 0.005336800590157509, + "loss_iou": 0.2255859375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 791806336, + "step": 8166 + }, + { + "epoch": 0.7984943292921393, + "grad_norm": 4.0872607377206505, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 791902968, + "step": 8167 + }, + { + "epoch": 0.7984943292921393, + "loss": 0.029267653822898865, + "loss_ce": 0.001992570236325264, + "loss_iou": 0.2734375, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 791902968, + "step": 8167 + }, + { + "epoch": 0.798592100117325, + "grad_norm": 2.9022775839857733, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 791999716, + "step": 8168 + }, + { + "epoch": 0.798592100117325, + "loss": 0.1556248962879181, + "loss_ce": 0.005905656609684229, + "loss_iou": 0.220703125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 791999716, + "step": 8168 + }, + { + "epoch": 0.7986898709425108, + "grad_norm": 3.5049675520679173, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 792095584, + "step": 8169 + }, + { + "epoch": 0.7986898709425108, + "loss": 0.07010824233293533, + "loss_ce": 0.0032480438239872456, + "loss_iou": 0.2197265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 792095584, + "step": 8169 + }, + { + "epoch": 0.7987876417676966, + "grad_norm": 4.963625964285404, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 792192820, + "step": 8170 + }, + { + "epoch": 0.7987876417676966, + "loss": 0.07039551436901093, + "loss_ce": 0.007361455354839563, + "loss_iou": 0.251953125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 792192820, + "step": 8170 + }, + { + "epoch": 0.7988854125928823, + "grad_norm": 2.356697148006749, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 792288788, + "step": 8171 + }, + { + "epoch": 0.7988854125928823, + "loss": 0.0599094033241272, + "loss_ce": 0.009753761813044548, + "loss_iou": 0.1455078125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 792288788, + "step": 8171 + }, + { + "epoch": 0.7989831834180681, + "grad_norm": 3.9386795341029712, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 792386020, + "step": 8172 + }, + { + "epoch": 0.7989831834180681, + "loss": 0.05446302890777588, + "loss_ce": 0.007458331063389778, + "loss_iou": 0.2109375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 792386020, + "step": 8172 + }, + { + "epoch": 0.7990809542432538, + "grad_norm": 2.5807667002630006, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 792483092, + "step": 8173 + }, + { + "epoch": 0.7990809542432538, + "loss": 0.047326453030109406, + "loss_ce": 0.004784951917827129, + "loss_iou": 0.353515625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 792483092, + "step": 8173 + }, + { + "epoch": 0.7991787250684396, + "grad_norm": 7.686812494203764, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 792580536, + "step": 8174 + }, + { + "epoch": 0.7991787250684396, + "loss": 0.06080283597111702, + "loss_ce": 0.006283184979110956, + "loss_iou": 0.248046875, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 792580536, + "step": 8174 + }, + { + "epoch": 0.7992764958936254, + "grad_norm": 10.438332489706944, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 792677764, + "step": 8175 + }, + { + "epoch": 0.7992764958936254, + "loss": 0.07053760439157486, + "loss_ce": 0.004314463585615158, + "loss_iou": 0.322265625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 792677764, + "step": 8175 + }, + { + "epoch": 0.7993742667188111, + "grad_norm": 11.387130082455059, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 792774284, + "step": 8176 + }, + { + "epoch": 0.7993742667188111, + "loss": 0.09174489974975586, + "loss_ce": 0.006089688278734684, + "loss_iou": 0.189453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 792774284, + "step": 8176 + }, + { + "epoch": 0.7994720375439969, + "grad_norm": 15.44936807291821, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 792870428, + "step": 8177 + }, + { + "epoch": 0.7994720375439969, + "loss": 0.11495666205883026, + "loss_ce": 0.007565304636955261, + "loss_iou": 0.20703125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 792870428, + "step": 8177 + }, + { + "epoch": 0.7995698083691827, + "grad_norm": 8.717804599149552, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 792966464, + "step": 8178 + }, + { + "epoch": 0.7995698083691827, + "loss": 0.06537792831659317, + "loss_ce": 0.00537274032831192, + "loss_iou": 0.2470703125, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 792966464, + "step": 8178 + }, + { + "epoch": 0.7996675791943684, + "grad_norm": 3.5833680892203668, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 793064056, + "step": 8179 + }, + { + "epoch": 0.7996675791943684, + "loss": 0.05287080258131027, + "loss_ce": 0.006560377310961485, + "loss_iou": 0.25, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 793064056, + "step": 8179 + }, + { + "epoch": 0.7997653500195542, + "grad_norm": 8.018962481656324, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 793161004, + "step": 8180 + }, + { + "epoch": 0.7997653500195542, + "loss": 0.08051837980747223, + "loss_ce": 0.0065742842853069305, + "loss_iou": 0.283203125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 793161004, + "step": 8180 + }, + { + "epoch": 0.79986312084474, + "grad_norm": 6.63197489764545, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 793257356, + "step": 8181 + }, + { + "epoch": 0.79986312084474, + "loss": 0.0714978575706482, + "loss_ce": 0.005373894702643156, + "loss_iou": 0.1904296875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 793257356, + "step": 8181 + }, + { + "epoch": 0.7999608916699257, + "grad_norm": 10.941245163779822, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 793354128, + "step": 8182 + }, + { + "epoch": 0.7999608916699257, + "loss": 0.04756429046392441, + "loss_ce": 0.0033443185966461897, + "loss_iou": 0.2314453125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 793354128, + "step": 8182 + }, + { + "epoch": 0.8000586624951115, + "grad_norm": 9.13728585456823, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 793451196, + "step": 8183 + }, + { + "epoch": 0.8000586624951115, + "loss": 0.06045088544487953, + "loss_ce": 0.004680014681071043, + "loss_iou": 0.33203125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 793451196, + "step": 8183 + }, + { + "epoch": 0.8001564333202972, + "grad_norm": 5.988052916524076, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 793548164, + "step": 8184 + }, + { + "epoch": 0.8001564333202972, + "loss": 0.07333871722221375, + "loss_ce": 0.003407685086131096, + "loss_iou": 0.29296875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 793548164, + "step": 8184 + }, + { + "epoch": 0.800254204145483, + "grad_norm": 3.8138693868752127, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 793644460, + "step": 8185 + }, + { + "epoch": 0.800254204145483, + "loss": 0.06297390908002853, + "loss_ce": 0.0037698056548833847, + "loss_iou": 0.291015625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 793644460, + "step": 8185 + }, + { + "epoch": 0.8003519749706688, + "grad_norm": 12.88684946297071, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 793740324, + "step": 8186 + }, + { + "epoch": 0.8003519749706688, + "loss": 0.07152526080608368, + "loss_ce": 0.00826232135295868, + "loss_iou": 0.2158203125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 793740324, + "step": 8186 + }, + { + "epoch": 0.8004497457958545, + "grad_norm": 6.100128686157485, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 793838096, + "step": 8187 + }, + { + "epoch": 0.8004497457958545, + "loss": 0.05450241267681122, + "loss_ce": 0.004728240892291069, + "loss_iou": 0.3671875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 793838096, + "step": 8187 + }, + { + "epoch": 0.8005475166210403, + "grad_norm": 10.971260263392399, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 793934844, + "step": 8188 + }, + { + "epoch": 0.8005475166210403, + "loss": 0.06175471097230911, + "loss_ce": 0.0060372427105903625, + "loss_iou": 0.302734375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 793934844, + "step": 8188 + }, + { + "epoch": 0.8006452874462261, + "grad_norm": 11.947617277620596, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 794032700, + "step": 8189 + }, + { + "epoch": 0.8006452874462261, + "loss": 0.07206244766712189, + "loss_ce": 0.008570626378059387, + "loss_iou": 0.3203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 794032700, + "step": 8189 + }, + { + "epoch": 0.8007430582714118, + "grad_norm": 21.562099599418595, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 794129888, + "step": 8190 + }, + { + "epoch": 0.8007430582714118, + "loss": 0.04277081787586212, + "loss_ce": 0.0056614442728459835, + "loss_iou": 0.43359375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 794129888, + "step": 8190 + }, + { + "epoch": 0.8008408290965976, + "grad_norm": 15.472308282375167, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 794227212, + "step": 8191 + }, + { + "epoch": 0.8008408290965976, + "loss": 0.0819777175784111, + "loss_ce": 0.004508845042437315, + "loss_iou": 0.326171875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 794227212, + "step": 8191 + }, + { + "epoch": 0.8009385999217833, + "grad_norm": 6.365363527087297, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 794324160, + "step": 8192 + }, + { + "epoch": 0.8009385999217833, + "loss": 0.06312836706638336, + "loss_ce": 0.00813569501042366, + "loss_iou": 0.33203125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 794324160, + "step": 8192 + }, + { + "epoch": 0.8010363707469691, + "grad_norm": 14.688704585903674, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 794420708, + "step": 8193 + }, + { + "epoch": 0.8010363707469691, + "loss": 0.055901359766721725, + "loss_ce": 0.002541372086852789, + "loss_iou": 0.2265625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 794420708, + "step": 8193 + }, + { + "epoch": 0.8011341415721549, + "grad_norm": 5.804119226543099, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 794516468, + "step": 8194 + }, + { + "epoch": 0.8011341415721549, + "loss": 0.05105142295360565, + "loss_ce": 0.007560059428215027, + "loss_iou": 0.2275390625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 794516468, + "step": 8194 + }, + { + "epoch": 0.8012319123973406, + "grad_norm": 10.186688451478991, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 794613424, + "step": 8195 + }, + { + "epoch": 0.8012319123973406, + "loss": 0.04664410278201103, + "loss_ce": 0.004735838156193495, + "loss_iou": 0.26953125, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 794613424, + "step": 8195 + }, + { + "epoch": 0.8013296832225264, + "grad_norm": 21.701558436931325, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 794710176, + "step": 8196 + }, + { + "epoch": 0.8013296832225264, + "loss": 0.03301040083169937, + "loss_ce": 0.006051933858543634, + "loss_iou": 0.251953125, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 794710176, + "step": 8196 + }, + { + "epoch": 0.8014274540477122, + "grad_norm": 8.673288564892463, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 794807220, + "step": 8197 + }, + { + "epoch": 0.8014274540477122, + "loss": 0.0854964479804039, + "loss_ce": 0.007653737440705299, + "loss_iou": 0.228515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 794807220, + "step": 8197 + }, + { + "epoch": 0.8015252248728979, + "grad_norm": 6.347125836588514, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 794902868, + "step": 8198 + }, + { + "epoch": 0.8015252248728979, + "loss": 0.04816826432943344, + "loss_ce": 0.007381522096693516, + "loss_iou": 0.232421875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 794902868, + "step": 8198 + }, + { + "epoch": 0.8016229956980837, + "grad_norm": 10.018202967699365, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 795000392, + "step": 8199 + }, + { + "epoch": 0.8016229956980837, + "loss": 0.06256669759750366, + "loss_ce": 0.005376751534640789, + "loss_iou": 0.244140625, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 795000392, + "step": 8199 + }, + { + "epoch": 0.8017207665232694, + "grad_norm": 4.681604285456344, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 795096808, + "step": 8200 + }, + { + "epoch": 0.8017207665232694, + "loss": 0.06159829720854759, + "loss_ce": 0.0059647513553500175, + "loss_iou": 0.2412109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 795096808, + "step": 8200 + }, + { + "epoch": 0.8018185373484552, + "grad_norm": 9.864735568712675, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 795192304, + "step": 8201 + }, + { + "epoch": 0.8018185373484552, + "loss": 0.14398834109306335, + "loss_ce": 0.007261951453983784, + "loss_iou": 0.2734375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 795192304, + "step": 8201 + }, + { + "epoch": 0.801916308173641, + "grad_norm": 12.343060034235013, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 795287992, + "step": 8202 + }, + { + "epoch": 0.801916308173641, + "loss": 0.09797828644514084, + "loss_ce": 0.00782935693860054, + "loss_iou": 0.1611328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 795287992, + "step": 8202 + }, + { + "epoch": 0.8020140789988267, + "grad_norm": 4.9303905302127315, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 795383688, + "step": 8203 + }, + { + "epoch": 0.8020140789988267, + "loss": 0.06467954069375992, + "loss_ce": 0.0023550186306238174, + "loss_iou": 0.2451171875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 795383688, + "step": 8203 + }, + { + "epoch": 0.8021118498240125, + "grad_norm": 3.986045214299948, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 795481100, + "step": 8204 + }, + { + "epoch": 0.8021118498240125, + "loss": 0.060139529407024384, + "loss_ce": 0.007389889098703861, + "loss_iou": 0.267578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 795481100, + "step": 8204 + }, + { + "epoch": 0.8022096206491983, + "grad_norm": 5.461110966003517, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 795578008, + "step": 8205 + }, + { + "epoch": 0.8022096206491983, + "loss": 0.10557154566049576, + "loss_ce": 0.008086949586868286, + "loss_iou": 0.310546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 795578008, + "step": 8205 + }, + { + "epoch": 0.802307391474384, + "grad_norm": 13.514925362021524, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 795674868, + "step": 8206 + }, + { + "epoch": 0.802307391474384, + "loss": 0.08042410016059875, + "loss_ce": 0.006586814299225807, + "loss_iou": 0.2041015625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 795674868, + "step": 8206 + }, + { + "epoch": 0.8024051622995698, + "grad_norm": 3.3303987200359617, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 795772300, + "step": 8207 + }, + { + "epoch": 0.8024051622995698, + "loss": 0.059610575437545776, + "loss_ce": 0.0036031892523169518, + "loss_iou": 0.23046875, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 795772300, + "step": 8207 + }, + { + "epoch": 0.8025029331247556, + "grad_norm": 8.30051972662747, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 795868180, + "step": 8208 + }, + { + "epoch": 0.8025029331247556, + "loss": 0.06786388903856277, + "loss_ce": 0.004402585327625275, + "loss_iou": 0.21875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 795868180, + "step": 8208 + }, + { + "epoch": 0.8026007039499413, + "grad_norm": 6.691924309834364, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 795963884, + "step": 8209 + }, + { + "epoch": 0.8026007039499413, + "loss": 0.0836087241768837, + "loss_ce": 0.006803610827773809, + "loss_iou": 0.259765625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 795963884, + "step": 8209 + }, + { + "epoch": 0.8026984747751271, + "grad_norm": 4.50368776949373, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 796061004, + "step": 8210 + }, + { + "epoch": 0.8026984747751271, + "loss": 0.054877400398254395, + "loss_ce": 0.010089038871228695, + "loss_iou": 0.23828125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 796061004, + "step": 8210 + }, + { + "epoch": 0.8027962456003128, + "grad_norm": 7.92272013383565, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 796157740, + "step": 8211 + }, + { + "epoch": 0.8027962456003128, + "loss": 0.04776362329721451, + "loss_ce": 0.008563793264329433, + "loss_iou": 0.263671875, + "loss_num": 0.0078125, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 796157740, + "step": 8211 + }, + { + "epoch": 0.8028940164254986, + "grad_norm": 8.804493175603998, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 796254620, + "step": 8212 + }, + { + "epoch": 0.8028940164254986, + "loss": 0.07476244121789932, + "loss_ce": 0.003000359982252121, + "loss_iou": 0.2578125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 796254620, + "step": 8212 + }, + { + "epoch": 0.8029917872506844, + "grad_norm": 11.191781696901295, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 796352128, + "step": 8213 + }, + { + "epoch": 0.8029917872506844, + "loss": 0.11758892238140106, + "loss_ce": 0.005726738832890987, + "loss_iou": 0.275390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 796352128, + "step": 8213 + }, + { + "epoch": 0.8030895580758701, + "grad_norm": 5.480718596769944, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 796450004, + "step": 8214 + }, + { + "epoch": 0.8030895580758701, + "loss": 0.10393586754798889, + "loss_ce": 0.007683426141738892, + "loss_iou": 0.3671875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 796450004, + "step": 8214 + }, + { + "epoch": 0.8031873289010559, + "grad_norm": 15.497972308335717, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 796546440, + "step": 8215 + }, + { + "epoch": 0.8031873289010559, + "loss": 0.09101934731006622, + "loss_ce": 0.005097104702144861, + "loss_iou": 0.287109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 796546440, + "step": 8215 + }, + { + "epoch": 0.8032850997262417, + "grad_norm": 26.264974945233142, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 796643348, + "step": 8216 + }, + { + "epoch": 0.8032850997262417, + "loss": 0.06683839112520218, + "loss_ce": 0.0036975222174078226, + "loss_iou": 0.337890625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 796643348, + "step": 8216 + }, + { + "epoch": 0.8033828705514274, + "grad_norm": 11.808120812080308, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 796738964, + "step": 8217 + }, + { + "epoch": 0.8033828705514274, + "loss": 0.07781513035297394, + "loss_ce": 0.007140240166336298, + "loss_iou": 0.1611328125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 796738964, + "step": 8217 + }, + { + "epoch": 0.8034806413766132, + "grad_norm": 5.383001342379487, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 796836588, + "step": 8218 + }, + { + "epoch": 0.8034806413766132, + "loss": 0.10459968447685242, + "loss_ce": 0.005951615050435066, + "loss_iou": 0.2255859375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 796836588, + "step": 8218 + }, + { + "epoch": 0.8035784122017989, + "grad_norm": 12.129199226029186, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 796933976, + "step": 8219 + }, + { + "epoch": 0.8035784122017989, + "loss": 0.06464444845914841, + "loss_ce": 0.005379309877753258, + "loss_iou": 0.220703125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 796933976, + "step": 8219 + }, + { + "epoch": 0.8036761830269847, + "grad_norm": 35.93988810605957, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 797030436, + "step": 8220 + }, + { + "epoch": 0.8036761830269847, + "loss": 0.03942275419831276, + "loss_ce": 0.003152614925056696, + "loss_iou": 0.298828125, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 797030436, + "step": 8220 + }, + { + "epoch": 0.8037739538521705, + "grad_norm": 7.852153189052196, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 797127524, + "step": 8221 + }, + { + "epoch": 0.8037739538521705, + "loss": 0.13222558796405792, + "loss_ce": 0.002983644139021635, + "loss_iou": 0.318359375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 797127524, + "step": 8221 + }, + { + "epoch": 0.8038717246773562, + "grad_norm": 3.959386497085592, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 797225236, + "step": 8222 + }, + { + "epoch": 0.8038717246773562, + "loss": 0.07531432807445526, + "loss_ce": 0.006054690573364496, + "loss_iou": 0.390625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 797225236, + "step": 8222 + }, + { + "epoch": 0.803969495502542, + "grad_norm": 17.789942823467495, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 797323092, + "step": 8223 + }, + { + "epoch": 0.803969495502542, + "loss": 0.04440658912062645, + "loss_ce": 0.0037953220307826996, + "loss_iou": 0.228515625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 797323092, + "step": 8223 + }, + { + "epoch": 0.8040672663277278, + "grad_norm": 5.683064931600806, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 797418612, + "step": 8224 + }, + { + "epoch": 0.8040672663277278, + "loss": 0.1190694198012352, + "loss_ce": 0.008996326476335526, + "loss_iou": 0.1728515625, + "loss_num": 0.0220947265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 797418612, + "step": 8224 + }, + { + "epoch": 0.8041650371529135, + "grad_norm": 5.184398372089799, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 797515688, + "step": 8225 + }, + { + "epoch": 0.8041650371529135, + "loss": 0.054664451628923416, + "loss_ce": 0.0053327856585383415, + "loss_iou": 0.24609375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 797515688, + "step": 8225 + }, + { + "epoch": 0.8042628079780993, + "grad_norm": 12.086198203360523, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 797613504, + "step": 8226 + }, + { + "epoch": 0.8042628079780993, + "loss": 0.053894609212875366, + "loss_ce": 0.0018011077772825956, + "loss_iou": 0.3359375, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 797613504, + "step": 8226 + }, + { + "epoch": 0.8043605788032852, + "grad_norm": 4.809106742024164, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 797711656, + "step": 8227 + }, + { + "epoch": 0.8043605788032852, + "loss": 0.060113511979579926, + "loss_ce": 0.006295762024819851, + "loss_iou": 0.3203125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 797711656, + "step": 8227 + }, + { + "epoch": 0.8044583496284708, + "grad_norm": 4.683492224060584, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 797808884, + "step": 8228 + }, + { + "epoch": 0.8044583496284708, + "loss": 0.08092399686574936, + "loss_ce": 0.006964649073779583, + "loss_iou": 0.2890625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 797808884, + "step": 8228 + }, + { + "epoch": 0.8045561204536567, + "grad_norm": 5.908083586363141, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 797905576, + "step": 8229 + }, + { + "epoch": 0.8045561204536567, + "loss": 0.045774172991514206, + "loss_ce": 0.005437563639134169, + "loss_iou": 0.28125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 797905576, + "step": 8229 + }, + { + "epoch": 0.8046538912788423, + "grad_norm": 8.55422267729804, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 798002436, + "step": 8230 + }, + { + "epoch": 0.8046538912788423, + "loss": 0.09709348529577255, + "loss_ce": 0.003663916140794754, + "loss_iou": 0.279296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 798002436, + "step": 8230 + }, + { + "epoch": 0.8047516621040282, + "grad_norm": 5.941340638383959, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 798099928, + "step": 8231 + }, + { + "epoch": 0.8047516621040282, + "loss": 0.0889066830277443, + "loss_ce": 0.0035261285956948996, + "loss_iou": 0.263671875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 798099928, + "step": 8231 + }, + { + "epoch": 0.804849432929214, + "grad_norm": 2.8298865648086857, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 798197736, + "step": 8232 + }, + { + "epoch": 0.804849432929214, + "loss": 0.04316607862710953, + "loss_ce": 0.0030087619088590145, + "loss_iou": 0.205078125, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 798197736, + "step": 8232 + }, + { + "epoch": 0.8049472037543997, + "grad_norm": 9.05031035790003, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 798294220, + "step": 8233 + }, + { + "epoch": 0.8049472037543997, + "loss": 0.04815630242228508, + "loss_ce": 0.007018608041107655, + "loss_iou": 0.349609375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 798294220, + "step": 8233 + }, + { + "epoch": 0.8050449745795855, + "grad_norm": 44.95720337918335, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 798392696, + "step": 8234 + }, + { + "epoch": 0.8050449745795855, + "loss": 0.06030607223510742, + "loss_ce": 0.004794602748006582, + "loss_iou": 0.2734375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 798392696, + "step": 8234 + }, + { + "epoch": 0.8051427454047713, + "grad_norm": 25.706794909772455, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 798489640, + "step": 8235 + }, + { + "epoch": 0.8051427454047713, + "loss": 0.10351491719484329, + "loss_ce": 0.010264638811349869, + "loss_iou": 0.345703125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 798489640, + "step": 8235 + }, + { + "epoch": 0.805240516229957, + "grad_norm": 2.2987869428446555, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 798585788, + "step": 8236 + }, + { + "epoch": 0.805240516229957, + "loss": 0.031566936522722244, + "loss_ce": 0.007206279318779707, + "loss_iou": 0.265625, + "loss_num": 0.004852294921875, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 798585788, + "step": 8236 + }, + { + "epoch": 0.8053382870551428, + "grad_norm": 9.897422950818065, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 798683108, + "step": 8237 + }, + { + "epoch": 0.8053382870551428, + "loss": 0.058767009526491165, + "loss_ce": 0.009160686284303665, + "loss_iou": 0.259765625, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 798683108, + "step": 8237 + }, + { + "epoch": 0.8054360578803285, + "grad_norm": 3.245842431137891, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 798781048, + "step": 8238 + }, + { + "epoch": 0.8054360578803285, + "loss": 0.0469098761677742, + "loss_ce": 0.0030866344459354877, + "loss_iou": 0.3046875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 798781048, + "step": 8238 + }, + { + "epoch": 0.8055338287055143, + "grad_norm": 13.828791582535505, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 798878324, + "step": 8239 + }, + { + "epoch": 0.8055338287055143, + "loss": 0.07131002843379974, + "loss_ce": 0.005754453595727682, + "loss_iou": 0.28125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 798878324, + "step": 8239 + }, + { + "epoch": 0.8056315995307001, + "grad_norm": 14.671935741629303, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 798975624, + "step": 8240 + }, + { + "epoch": 0.8056315995307001, + "loss": 0.04000406712293625, + "loss_ce": 0.005976965185254812, + "loss_iou": 0.322265625, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 798975624, + "step": 8240 + }, + { + "epoch": 0.8057293703558858, + "grad_norm": 4.218757528330226, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 799073072, + "step": 8241 + }, + { + "epoch": 0.8057293703558858, + "loss": 0.07913894206285477, + "loss_ce": 0.004088589455932379, + "loss_iou": 0.25390625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 799073072, + "step": 8241 + }, + { + "epoch": 0.8058271411810716, + "grad_norm": 3.203076160661014, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 799171028, + "step": 8242 + }, + { + "epoch": 0.8058271411810716, + "loss": 0.05395781993865967, + "loss_ce": 0.004984736442565918, + "loss_iou": 0.34765625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 799171028, + "step": 8242 + }, + { + "epoch": 0.8059249120062574, + "grad_norm": 7.2779319483562315, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 799268136, + "step": 8243 + }, + { + "epoch": 0.8059249120062574, + "loss": 0.07028092443943024, + "loss_ce": 0.005797282326966524, + "loss_iou": 0.2890625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 799268136, + "step": 8243 + }, + { + "epoch": 0.8060226828314431, + "grad_norm": 13.079924103752507, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 799364452, + "step": 8244 + }, + { + "epoch": 0.8060226828314431, + "loss": 0.10068723559379578, + "loss_ce": 0.005121446214616299, + "loss_iou": 0.2421875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 799364452, + "step": 8244 + }, + { + "epoch": 0.8061204536566289, + "grad_norm": 6.830348322678727, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 799461644, + "step": 8245 + }, + { + "epoch": 0.8061204536566289, + "loss": 0.09204255044460297, + "loss_ce": 0.006471261847764254, + "loss_iou": 0.26953125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 799461644, + "step": 8245 + }, + { + "epoch": 0.8062182244818146, + "grad_norm": 10.379575630256724, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 799558928, + "step": 8246 + }, + { + "epoch": 0.8062182244818146, + "loss": 0.09184344857931137, + "loss_ce": 0.004679517820477486, + "loss_iou": 0.26953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 799558928, + "step": 8246 + }, + { + "epoch": 0.8063159953070004, + "grad_norm": 9.076870993279494, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 799656120, + "step": 8247 + }, + { + "epoch": 0.8063159953070004, + "loss": 0.08321371674537659, + "loss_ce": 0.004920871462672949, + "loss_iou": 0.2431640625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 799656120, + "step": 8247 + }, + { + "epoch": 0.8064137661321862, + "grad_norm": 2.782138786175322, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 799753076, + "step": 8248 + }, + { + "epoch": 0.8064137661321862, + "loss": 0.09479021281003952, + "loss_ce": 0.009783494286239147, + "loss_iou": 0.248046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 799753076, + "step": 8248 + }, + { + "epoch": 0.8065115369573719, + "grad_norm": 39.312650647951436, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 799849496, + "step": 8249 + }, + { + "epoch": 0.8065115369573719, + "loss": 0.05582772195339203, + "loss_ce": 0.002818689215928316, + "loss_iou": 0.1923828125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 799849496, + "step": 8249 + }, + { + "epoch": 0.8066093077825577, + "grad_norm": 41.31067702660251, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 799946484, + "step": 8250 + }, + { + "epoch": 0.8066093077825577, + "eval_seeclick_CIoU": 0.5316634327173233, + "eval_seeclick_GIoU": 0.536701038479805, + "eval_seeclick_IoU": 0.5757040232419968, + "eval_seeclick_MAE_all": 0.07175843603909016, + "eval_seeclick_MAE_h": 0.0435309037566185, + "eval_seeclick_MAE_w": 0.09004411846399307, + "eval_seeclick_MAE_x": 0.11154357716441154, + "eval_seeclick_MAE_y": 0.041915129870176315, + "eval_seeclick_NUM_probability": 0.9999978244304657, + "eval_seeclick_inside_bbox": 0.7698863744735718, + "eval_seeclick_loss": 0.2555573582649231, + "eval_seeclick_loss_ce": 0.010498513467609882, + "eval_seeclick_loss_iou": 0.40478515625, + "eval_seeclick_loss_num": 0.048892974853515625, + "eval_seeclick_loss_xval": 0.24432373046875, + "eval_seeclick_runtime": 76.2089, + "eval_seeclick_samples_per_second": 0.564, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 799946484, + "step": 8250 + }, + { + "epoch": 0.8066093077825577, + "eval_icons_CIoU": 0.7180690169334412, + "eval_icons_GIoU": 0.724346250295639, + "eval_icons_IoU": 0.7415605187416077, + "eval_icons_MAE_all": 0.04679430183023214, + "eval_icons_MAE_h": 0.05034653656184673, + "eval_icons_MAE_w": 0.04449541121721268, + "eval_icons_MAE_x": 0.04645376838743687, + "eval_icons_MAE_y": 0.04588147811591625, + "eval_icons_NUM_probability": 0.9999982118606567, + "eval_icons_inside_bbox": 0.8472222089767456, + "eval_icons_loss": 0.16236664354801178, + "eval_icons_loss_ce": 6.223845957720187e-05, + "eval_icons_loss_iou": 0.44610595703125, + "eval_icons_loss_num": 0.0363922119140625, + "eval_icons_loss_xval": 0.1819610595703125, + "eval_icons_runtime": 92.0594, + "eval_icons_samples_per_second": 0.543, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 799946484, + "step": 8250 + }, + { + "epoch": 0.8066093077825577, + "eval_screenspot_CIoU": 0.30002449452877045, + "eval_screenspot_GIoU": 0.27396703387300175, + "eval_screenspot_IoU": 0.40441884597142536, + "eval_screenspot_MAE_all": 0.17873905102411905, + "eval_screenspot_MAE_h": 0.13125968227783838, + "eval_screenspot_MAE_w": 0.241275355219841, + "eval_screenspot_MAE_x": 0.21707201997439066, + "eval_screenspot_MAE_y": 0.12534915407498678, + "eval_screenspot_NUM_probability": 0.9999867876370748, + "eval_screenspot_inside_bbox": 0.6166666746139526, + "eval_screenspot_loss": 0.6185488104820251, + "eval_screenspot_loss_ce": 0.02460755594074726, + "eval_screenspot_loss_iou": 0.373046875, + "eval_screenspot_loss_num": 0.11938985188802083, + "eval_screenspot_loss_xval": 0.5965779622395834, + "eval_screenspot_runtime": 157.3132, + "eval_screenspot_samples_per_second": 0.566, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 799946484, + "step": 8250 + }, + { + "epoch": 0.8066093077825577, + "eval_compot_CIoU": 0.5223739594221115, + "eval_compot_GIoU": 0.5018188208341599, + "eval_compot_IoU": 0.5793587267398834, + "eval_compot_MAE_all": 0.0834507904946804, + "eval_compot_MAE_h": 0.08264481648802757, + "eval_compot_MAE_w": 0.08598303981125355, + "eval_compot_MAE_x": 0.07860999554395676, + "eval_compot_MAE_y": 0.08656531944870949, + "eval_compot_NUM_probability": 0.9999943673610687, + "eval_compot_inside_bbox": 0.7795138955116272, + "eval_compot_loss": 0.2930273413658142, + "eval_compot_loss_ce": 0.02991802617907524, + "eval_compot_loss_iou": 0.43603515625, + "eval_compot_loss_num": 0.05013275146484375, + "eval_compot_loss_xval": 0.2505950927734375, + "eval_compot_runtime": 98.5035, + "eval_compot_samples_per_second": 0.508, + "eval_compot_steps_per_second": 0.02, + "num_input_tokens_seen": 799946484, + "step": 8250 + }, + { + "epoch": 0.8066093077825577, + "loss": 0.2596808075904846, + "loss_ce": 0.03543763607740402, + "loss_iou": 0.416015625, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 799946484, + "step": 8250 + }, + { + "epoch": 0.8067070786077435, + "grad_norm": 11.101312378178795, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 800043796, + "step": 8251 + }, + { + "epoch": 0.8067070786077435, + "loss": 0.0641724243760109, + "loss_ce": 0.0020691496320068836, + "loss_iou": 0.40234375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 800043796, + "step": 8251 + }, + { + "epoch": 0.8068048494329292, + "grad_norm": 10.265466875500964, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 800140076, + "step": 8252 + }, + { + "epoch": 0.8068048494329292, + "loss": 0.12405417859554291, + "loss_ce": 0.0048525105230510235, + "loss_iou": 0.28515625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 800140076, + "step": 8252 + }, + { + "epoch": 0.806902620258115, + "grad_norm": 5.437313803341409, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 800237464, + "step": 8253 + }, + { + "epoch": 0.806902620258115, + "loss": 0.0819542407989502, + "loss_ce": 0.004126791842281818, + "loss_iou": 0.271484375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 800237464, + "step": 8253 + }, + { + "epoch": 0.8070003910833008, + "grad_norm": 5.325713353946833, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 800334968, + "step": 8254 + }, + { + "epoch": 0.8070003910833008, + "loss": 0.08209014683961868, + "loss_ce": 0.003934626467525959, + "loss_iou": 0.296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 800334968, + "step": 8254 + }, + { + "epoch": 0.8070981619084865, + "grad_norm": 12.22632372837052, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 800432248, + "step": 8255 + }, + { + "epoch": 0.8070981619084865, + "loss": 0.0980978012084961, + "loss_ce": 0.008589750155806541, + "loss_iou": 0.29296875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 800432248, + "step": 8255 + }, + { + "epoch": 0.8071959327336723, + "grad_norm": 36.14856439794725, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 800528628, + "step": 8256 + }, + { + "epoch": 0.8071959327336723, + "loss": 0.10055416077375412, + "loss_ce": 0.00727718323469162, + "loss_iou": 0.30078125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 800528628, + "step": 8256 + }, + { + "epoch": 0.807293703558858, + "grad_norm": 4.542886310387341, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 800625888, + "step": 8257 + }, + { + "epoch": 0.807293703558858, + "loss": 0.1216355562210083, + "loss_ce": 0.010490532033145428, + "loss_iou": 0.34375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 800625888, + "step": 8257 + }, + { + "epoch": 0.8073914743840438, + "grad_norm": 8.331167589669167, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 800723988, + "step": 8258 + }, + { + "epoch": 0.8073914743840438, + "loss": 0.07319652289152145, + "loss_ce": 0.009498707950115204, + "loss_iou": 0.37109375, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 800723988, + "step": 8258 + }, + { + "epoch": 0.8074892452092296, + "grad_norm": 14.645969960726974, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 800819912, + "step": 8259 + }, + { + "epoch": 0.8074892452092296, + "loss": 0.08891801536083221, + "loss_ce": 0.0015423714648932219, + "loss_iou": 0.26171875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 800819912, + "step": 8259 + }, + { + "epoch": 0.8075870160344153, + "grad_norm": 5.254777477450366, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 800916644, + "step": 8260 + }, + { + "epoch": 0.8075870160344153, + "loss": 0.08817218244075775, + "loss_ce": 0.008467897772789001, + "loss_iou": 0.201171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 800916644, + "step": 8260 + }, + { + "epoch": 0.8076847868596011, + "grad_norm": 10.958011774373517, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 801013664, + "step": 8261 + }, + { + "epoch": 0.8076847868596011, + "loss": 0.08438087999820709, + "loss_ce": 0.004630822688341141, + "loss_iou": 0.19140625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 801013664, + "step": 8261 + }, + { + "epoch": 0.8077825576847869, + "grad_norm": 13.872662169840876, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 801110980, + "step": 8262 + }, + { + "epoch": 0.8077825576847869, + "loss": 0.0683048665523529, + "loss_ce": 0.0038898889906704426, + "loss_iou": 0.205078125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 801110980, + "step": 8262 + }, + { + "epoch": 0.8078803285099726, + "grad_norm": 9.89916909369041, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 801209148, + "step": 8263 + }, + { + "epoch": 0.8078803285099726, + "loss": 0.07624613493680954, + "loss_ce": 0.006681318394839764, + "loss_iou": 0.30078125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 801209148, + "step": 8263 + }, + { + "epoch": 0.8079780993351584, + "grad_norm": 10.793993725023267, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 801305988, + "step": 8264 + }, + { + "epoch": 0.8079780993351584, + "loss": 0.052953265607357025, + "loss_ce": 0.005849387031048536, + "loss_iou": 0.31640625, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 801305988, + "step": 8264 + }, + { + "epoch": 0.8080758701603441, + "grad_norm": 6.648050986571348, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 801403296, + "step": 8265 + }, + { + "epoch": 0.8080758701603441, + "loss": 0.06150006502866745, + "loss_ce": 0.004844184033572674, + "loss_iou": 0.3359375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 801403296, + "step": 8265 + }, + { + "epoch": 0.8081736409855299, + "grad_norm": 6.525648981139835, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 801500356, + "step": 8266 + }, + { + "epoch": 0.8081736409855299, + "loss": 0.10744060575962067, + "loss_ce": 0.009585988707840443, + "loss_iou": 0.35546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 801500356, + "step": 8266 + }, + { + "epoch": 0.8082714118107157, + "grad_norm": 7.712339088492322, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 801597180, + "step": 8267 + }, + { + "epoch": 0.8082714118107157, + "loss": 0.0776069387793541, + "loss_ce": 0.008080268278717995, + "loss_iou": 0.283203125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 801597180, + "step": 8267 + }, + { + "epoch": 0.8083691826359014, + "grad_norm": 11.016655004332094, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 801693100, + "step": 8268 + }, + { + "epoch": 0.8083691826359014, + "loss": 0.05050654709339142, + "loss_ce": 0.005462600849568844, + "loss_iou": 0.244140625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 801693100, + "step": 8268 + }, + { + "epoch": 0.8084669534610872, + "grad_norm": 3.5890510333117236, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 801789408, + "step": 8269 + }, + { + "epoch": 0.8084669534610872, + "loss": 0.06147473677992821, + "loss_ce": 0.003277715528383851, + "loss_iou": 0.1796875, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 801789408, + "step": 8269 + }, + { + "epoch": 0.808564724286273, + "grad_norm": 3.68100232975018, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 801886168, + "step": 8270 + }, + { + "epoch": 0.808564724286273, + "loss": 0.07845553755760193, + "loss_ce": 0.004938691854476929, + "loss_iou": 0.322265625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 801886168, + "step": 8270 + }, + { + "epoch": 0.8086624951114587, + "grad_norm": 3.738504801232138, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 801982536, + "step": 8271 + }, + { + "epoch": 0.8086624951114587, + "loss": 0.08914932608604431, + "loss_ce": 0.006233065389096737, + "loss_iou": 0.3359375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 801982536, + "step": 8271 + }, + { + "epoch": 0.8087602659366445, + "grad_norm": 1.7936530777767652, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 802078704, + "step": 8272 + }, + { + "epoch": 0.8087602659366445, + "loss": 0.06750349700450897, + "loss_ce": 0.003630211343988776, + "loss_iou": 0.3203125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 802078704, + "step": 8272 + }, + { + "epoch": 0.8088580367618303, + "grad_norm": 8.04962651752485, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 802175872, + "step": 8273 + }, + { + "epoch": 0.8088580367618303, + "loss": 0.059152752161026, + "loss_ce": 0.002252728445455432, + "loss_iou": 0.25, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 802175872, + "step": 8273 + }, + { + "epoch": 0.808955807587016, + "grad_norm": 5.347354962152825, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 802272520, + "step": 8274 + }, + { + "epoch": 0.808955807587016, + "loss": 0.05395134538412094, + "loss_ce": 0.004627309273928404, + "loss_iou": 0.259765625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 802272520, + "step": 8274 + }, + { + "epoch": 0.8090535784122018, + "grad_norm": 4.1735451390257206, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 802369796, + "step": 8275 + }, + { + "epoch": 0.8090535784122018, + "loss": 0.048423249274492264, + "loss_ce": 0.004325348883867264, + "loss_iou": 0.3203125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 802369796, + "step": 8275 + }, + { + "epoch": 0.8091513492373875, + "grad_norm": 10.5575358078025, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 802466372, + "step": 8276 + }, + { + "epoch": 0.8091513492373875, + "loss": 0.07077513635158539, + "loss_ce": 0.005360702518373728, + "loss_iou": 0.302734375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 802466372, + "step": 8276 + }, + { + "epoch": 0.8092491200625733, + "grad_norm": 13.476579701381914, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 802563052, + "step": 8277 + }, + { + "epoch": 0.8092491200625733, + "loss": 0.03533058613538742, + "loss_ce": 0.004687122069299221, + "loss_iou": 0.1904296875, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 802563052, + "step": 8277 + }, + { + "epoch": 0.8093468908877591, + "grad_norm": 3.4371220788789842, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 802659568, + "step": 8278 + }, + { + "epoch": 0.8093468908877591, + "loss": 0.04806262254714966, + "loss_ce": 0.004502593539655209, + "loss_iou": 0.25390625, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 802659568, + "step": 8278 + }, + { + "epoch": 0.8094446617129448, + "grad_norm": 2.681334036278172, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 802756936, + "step": 8279 + }, + { + "epoch": 0.8094446617129448, + "loss": 0.06714527308940887, + "loss_ce": 0.007689407095313072, + "loss_iou": 0.28125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 802756936, + "step": 8279 + }, + { + "epoch": 0.8095424325381306, + "grad_norm": 8.144132821326151, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 802853788, + "step": 8280 + }, + { + "epoch": 0.8095424325381306, + "loss": 0.08086098730564117, + "loss_ce": 0.0035141899716109037, + "loss_iou": 0.263671875, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 802853788, + "step": 8280 + }, + { + "epoch": 0.8096402033633164, + "grad_norm": 5.3767023758457055, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 802951216, + "step": 8281 + }, + { + "epoch": 0.8096402033633164, + "loss": 0.0937291607260704, + "loss_ce": 0.006143711041659117, + "loss_iou": 0.404296875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 802951216, + "step": 8281 + }, + { + "epoch": 0.8097379741885021, + "grad_norm": 8.328528052105959, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 803048284, + "step": 8282 + }, + { + "epoch": 0.8097379741885021, + "loss": 0.07926303148269653, + "loss_ce": 0.011254610493779182, + "loss_iou": 0.26171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 803048284, + "step": 8282 + }, + { + "epoch": 0.8098357450136879, + "grad_norm": 11.327203626487991, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 803144208, + "step": 8283 + }, + { + "epoch": 0.8098357450136879, + "loss": 0.0367269366979599, + "loss_ce": 0.0029439746867865324, + "loss_iou": 0.330078125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 803144208, + "step": 8283 + }, + { + "epoch": 0.8099335158388736, + "grad_norm": 6.622785288314443, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 803241252, + "step": 8284 + }, + { + "epoch": 0.8099335158388736, + "loss": 0.03680982440710068, + "loss_ce": 0.0022181514650583267, + "loss_iou": 0.2177734375, + "loss_num": 0.00689697265625, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 803241252, + "step": 8284 + }, + { + "epoch": 0.8100312866640594, + "grad_norm": 8.502539289111947, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 803336996, + "step": 8285 + }, + { + "epoch": 0.8100312866640594, + "loss": 0.02936534211039543, + "loss_ce": 0.0017011603340506554, + "loss_iou": 0.173828125, + "loss_num": 0.005523681640625, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 803336996, + "step": 8285 + }, + { + "epoch": 0.8101290574892452, + "grad_norm": 5.181193274622391, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 803433740, + "step": 8286 + }, + { + "epoch": 0.8101290574892452, + "loss": 0.044402845203876495, + "loss_ce": 0.005759961903095245, + "loss_iou": 0.236328125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 803433740, + "step": 8286 + }, + { + "epoch": 0.8102268283144309, + "grad_norm": 3.0749032612132163, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 803531828, + "step": 8287 + }, + { + "epoch": 0.8102268283144309, + "loss": 0.07937891781330109, + "loss_ce": 0.003340555587783456, + "loss_iou": 0.29296875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 803531828, + "step": 8287 + }, + { + "epoch": 0.8103245991396167, + "grad_norm": 4.943562510251865, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 803629372, + "step": 8288 + }, + { + "epoch": 0.8103245991396167, + "loss": 0.14552703499794006, + "loss_ce": 0.011097095906734467, + "loss_iou": 0.291015625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 803629372, + "step": 8288 + }, + { + "epoch": 0.8104223699648025, + "grad_norm": 2.9752115380578417, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 803726980, + "step": 8289 + }, + { + "epoch": 0.8104223699648025, + "loss": 0.07755329459905624, + "loss_ce": 0.002975964453071356, + "loss_iou": 0.25390625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 803726980, + "step": 8289 + }, + { + "epoch": 0.8105201407899882, + "grad_norm": 9.835504640075408, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 803823320, + "step": 8290 + }, + { + "epoch": 0.8105201407899882, + "loss": 0.0698341578245163, + "loss_ce": 0.0029396284371614456, + "loss_iou": 0.322265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 803823320, + "step": 8290 + }, + { + "epoch": 0.810617911615174, + "grad_norm": 3.250087652288247, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 803921336, + "step": 8291 + }, + { + "epoch": 0.810617911615174, + "loss": 0.09919621050357819, + "loss_ce": 0.005858196411281824, + "loss_iou": 0.3046875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 803921336, + "step": 8291 + }, + { + "epoch": 0.8107156824403597, + "grad_norm": 43.74794268515069, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 804017264, + "step": 8292 + }, + { + "epoch": 0.8107156824403597, + "loss": 0.06321221590042114, + "loss_ce": 0.007273491472005844, + "loss_iou": 0.265625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 804017264, + "step": 8292 + }, + { + "epoch": 0.8108134532655455, + "grad_norm": 9.094492766585798, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 804114164, + "step": 8293 + }, + { + "epoch": 0.8108134532655455, + "loss": 0.07066532969474792, + "loss_ce": 0.009569143876433372, + "loss_iou": 0.2470703125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 804114164, + "step": 8293 + }, + { + "epoch": 0.8109112240907314, + "grad_norm": 9.50230521719898, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 804212092, + "step": 8294 + }, + { + "epoch": 0.8109112240907314, + "loss": 0.061492279171943665, + "loss_ce": 0.005347563419491053, + "loss_iou": 0.25, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 804212092, + "step": 8294 + }, + { + "epoch": 0.811008994915917, + "grad_norm": 6.608008246663115, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 804309612, + "step": 8295 + }, + { + "epoch": 0.811008994915917, + "loss": 0.036347899585962296, + "loss_ce": 0.0017104481812566519, + "loss_iou": 0.283203125, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 804309612, + "step": 8295 + }, + { + "epoch": 0.8111067657411029, + "grad_norm": 8.184060043491009, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 804406292, + "step": 8296 + }, + { + "epoch": 0.8111067657411029, + "loss": 0.09947723895311356, + "loss_ce": 0.006444402504712343, + "loss_iou": 0.220703125, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 804406292, + "step": 8296 + }, + { + "epoch": 0.8112045365662887, + "grad_norm": 2.2574457588444314, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 804503720, + "step": 8297 + }, + { + "epoch": 0.8112045365662887, + "loss": 0.05631604045629501, + "loss_ce": 0.004260681569576263, + "loss_iou": 0.3203125, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 804503720, + "step": 8297 + }, + { + "epoch": 0.8113023073914744, + "grad_norm": 3.861897191577367, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 804600848, + "step": 8298 + }, + { + "epoch": 0.8113023073914744, + "loss": 0.09658977389335632, + "loss_ce": 0.005906794220209122, + "loss_iou": 0.263671875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 804600848, + "step": 8298 + }, + { + "epoch": 0.8114000782166602, + "grad_norm": 4.379470036498104, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 804697004, + "step": 8299 + }, + { + "epoch": 0.8114000782166602, + "loss": 0.04043233022093773, + "loss_ce": 0.0035670935176312923, + "loss_iou": 0.1708984375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 804697004, + "step": 8299 + }, + { + "epoch": 0.811497849041846, + "grad_norm": 13.402651049467112, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 804794060, + "step": 8300 + }, + { + "epoch": 0.811497849041846, + "loss": 0.08428153395652771, + "loss_ce": 0.005981055554002523, + "loss_iou": 0.255859375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 804794060, + "step": 8300 + }, + { + "epoch": 0.8115956198670317, + "grad_norm": 8.895653820731154, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 804890672, + "step": 8301 + }, + { + "epoch": 0.8115956198670317, + "loss": 0.07353228330612183, + "loss_ce": 0.006309689953923225, + "loss_iou": 0.298828125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 804890672, + "step": 8301 + }, + { + "epoch": 0.8116933906922175, + "grad_norm": 7.4208447288973405, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 804987316, + "step": 8302 + }, + { + "epoch": 0.8116933906922175, + "loss": 0.08483918011188507, + "loss_ce": 0.009460760280489922, + "loss_iou": 0.337890625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 804987316, + "step": 8302 + }, + { + "epoch": 0.8117911615174032, + "grad_norm": 6.880779157934383, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 805084404, + "step": 8303 + }, + { + "epoch": 0.8117911615174032, + "loss": 0.0808180719614029, + "loss_ce": 0.006431473419070244, + "loss_iou": 0.203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 805084404, + "step": 8303 + }, + { + "epoch": 0.811888932342589, + "grad_norm": 12.500403806401598, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 805180884, + "step": 8304 + }, + { + "epoch": 0.811888932342589, + "loss": 0.07108217477798462, + "loss_ce": 0.006873190402984619, + "loss_iou": 0.1923828125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 805180884, + "step": 8304 + }, + { + "epoch": 0.8119867031677748, + "grad_norm": 10.725670408454773, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 805277380, + "step": 8305 + }, + { + "epoch": 0.8119867031677748, + "loss": 0.11296284943819046, + "loss_ce": 0.0055295308120548725, + "loss_iou": 0.158203125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 805277380, + "step": 8305 + }, + { + "epoch": 0.8120844739929605, + "grad_norm": 5.338000619617481, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 805375140, + "step": 8306 + }, + { + "epoch": 0.8120844739929605, + "loss": 0.05642242729663849, + "loss_ce": 0.005198674276471138, + "loss_iou": 0.423828125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 805375140, + "step": 8306 + }, + { + "epoch": 0.8121822448181463, + "grad_norm": 10.962727742219212, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 805471456, + "step": 8307 + }, + { + "epoch": 0.8121822448181463, + "loss": 0.07447487115859985, + "loss_ce": 0.007229388691484928, + "loss_iou": 0.228515625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 805471456, + "step": 8307 + }, + { + "epoch": 0.8122800156433321, + "grad_norm": 18.602685060791327, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 805568540, + "step": 8308 + }, + { + "epoch": 0.8122800156433321, + "loss": 0.08078500628471375, + "loss_ce": 0.005559179000556469, + "loss_iou": 0.291015625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 805568540, + "step": 8308 + }, + { + "epoch": 0.8123777864685178, + "grad_norm": 12.761867714920697, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 805665528, + "step": 8309 + }, + { + "epoch": 0.8123777864685178, + "loss": 0.05509987100958824, + "loss_ce": 0.0016941106878221035, + "loss_iou": 0.32421875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 805665528, + "step": 8309 + }, + { + "epoch": 0.8124755572937036, + "grad_norm": 14.923572346825038, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 805762920, + "step": 8310 + }, + { + "epoch": 0.8124755572937036, + "loss": 0.10712257027626038, + "loss_ce": 0.005277783609926701, + "loss_iou": 0.21875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 805762920, + "step": 8310 + }, + { + "epoch": 0.8125733281188893, + "grad_norm": 7.00011401943325, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 805859644, + "step": 8311 + }, + { + "epoch": 0.8125733281188893, + "loss": 0.07305584102869034, + "loss_ce": 0.0036436100490391254, + "loss_iou": 0.27734375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 805859644, + "step": 8311 + }, + { + "epoch": 0.8126710989440751, + "grad_norm": 7.928955143865343, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 805957224, + "step": 8312 + }, + { + "epoch": 0.8126710989440751, + "loss": 0.06239809840917587, + "loss_ce": 0.006623406428843737, + "loss_iou": 0.2451171875, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 805957224, + "step": 8312 + }, + { + "epoch": 0.8127688697692609, + "grad_norm": 7.012949153528413, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 806053548, + "step": 8313 + }, + { + "epoch": 0.8127688697692609, + "loss": 0.09120656549930573, + "loss_ce": 0.004948629066348076, + "loss_iou": 0.2197265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 806053548, + "step": 8313 + }, + { + "epoch": 0.8128666405944466, + "grad_norm": 12.477557372883574, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 806150248, + "step": 8314 + }, + { + "epoch": 0.8128666405944466, + "loss": 0.049466125667095184, + "loss_ce": 0.003285397542640567, + "loss_iou": 0.2578125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 806150248, + "step": 8314 + }, + { + "epoch": 0.8129644114196324, + "grad_norm": 20.540955881129324, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 806248348, + "step": 8315 + }, + { + "epoch": 0.8129644114196324, + "loss": 0.09305129945278168, + "loss_ce": 0.0033296174369752407, + "loss_iou": 0.30859375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 806248348, + "step": 8315 + }, + { + "epoch": 0.8130621822448182, + "grad_norm": 17.05131616430286, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 806344744, + "step": 8316 + }, + { + "epoch": 0.8130621822448182, + "loss": 0.09961056709289551, + "loss_ce": 0.011170618236064911, + "loss_iou": 0.2314453125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 806344744, + "step": 8316 + }, + { + "epoch": 0.8131599530700039, + "grad_norm": 4.705059825979074, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 806440988, + "step": 8317 + }, + { + "epoch": 0.8131599530700039, + "loss": 0.0998043641448021, + "loss_ce": 0.005810222588479519, + "loss_iou": 0.39453125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 806440988, + "step": 8317 + }, + { + "epoch": 0.8132577238951897, + "grad_norm": 13.254274966668635, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 806538660, + "step": 8318 + }, + { + "epoch": 0.8132577238951897, + "loss": 0.05143298581242561, + "loss_ce": 0.005175967235118151, + "loss_iou": 0.291015625, + "loss_num": 0.00927734375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 806538660, + "step": 8318 + }, + { + "epoch": 0.8133554947203755, + "grad_norm": 56.33849744011924, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 806635904, + "step": 8319 + }, + { + "epoch": 0.8133554947203755, + "loss": 0.08652673661708832, + "loss_ce": 0.005945076234638691, + "loss_iou": 0.314453125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 806635904, + "step": 8319 + }, + { + "epoch": 0.8134532655455612, + "grad_norm": 26.064989811770957, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 806732076, + "step": 8320 + }, + { + "epoch": 0.8134532655455612, + "loss": 0.10297427326440811, + "loss_ce": 0.0023997831158339977, + "loss_iou": 0.30078125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 806732076, + "step": 8320 + }, + { + "epoch": 0.813551036370747, + "grad_norm": 12.123824434924012, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 806829112, + "step": 8321 + }, + { + "epoch": 0.813551036370747, + "loss": 0.050103455781936646, + "loss_ce": 0.0036480743438005447, + "loss_iou": 0.2197265625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 806829112, + "step": 8321 + }, + { + "epoch": 0.8136488071959327, + "grad_norm": 10.995695704942928, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 806925628, + "step": 8322 + }, + { + "epoch": 0.8136488071959327, + "loss": 0.05454450100660324, + "loss_ce": 0.007215553894639015, + "loss_iou": 0.2080078125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 806925628, + "step": 8322 + }, + { + "epoch": 0.8137465780211185, + "grad_norm": 9.267544095823867, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 807022384, + "step": 8323 + }, + { + "epoch": 0.8137465780211185, + "loss": 0.07853877544403076, + "loss_ce": 0.004747273866087198, + "loss_iou": 0.248046875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 807022384, + "step": 8323 + }, + { + "epoch": 0.8138443488463043, + "grad_norm": 6.2852618296686344, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 807119240, + "step": 8324 + }, + { + "epoch": 0.8138443488463043, + "loss": 0.054962195456027985, + "loss_ce": 0.0018768669106066227, + "loss_iou": 0.42578125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 807119240, + "step": 8324 + }, + { + "epoch": 0.81394211967149, + "grad_norm": 8.035236274703133, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 807215828, + "step": 8325 + }, + { + "epoch": 0.81394211967149, + "loss": 0.04308551177382469, + "loss_ce": 0.0022453623823821545, + "loss_iou": 0.2470703125, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 807215828, + "step": 8325 + }, + { + "epoch": 0.8140398904966758, + "grad_norm": 6.590309530612578, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 807313112, + "step": 8326 + }, + { + "epoch": 0.8140398904966758, + "loss": 0.061045002192258835, + "loss_ce": 0.008539506234228611, + "loss_iou": 0.345703125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 807313112, + "step": 8326 + }, + { + "epoch": 0.8141376613218616, + "grad_norm": 4.628441416888845, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 807408864, + "step": 8327 + }, + { + "epoch": 0.8141376613218616, + "loss": 0.12601768970489502, + "loss_ce": 0.007487413473427296, + "loss_iou": 0.1796875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 807408864, + "step": 8327 + }, + { + "epoch": 0.8142354321470473, + "grad_norm": 14.124506364016277, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 807505772, + "step": 8328 + }, + { + "epoch": 0.8142354321470473, + "loss": 0.0775768905878067, + "loss_ce": 0.007630603853613138, + "loss_iou": 0.2265625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 807505772, + "step": 8328 + }, + { + "epoch": 0.8143332029722331, + "grad_norm": 22.42985621663362, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 807602808, + "step": 8329 + }, + { + "epoch": 0.8143332029722331, + "loss": 0.05421947315335274, + "loss_ce": 0.003216970944777131, + "loss_iou": 0.318359375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 807602808, + "step": 8329 + }, + { + "epoch": 0.8144309737974188, + "grad_norm": 16.901730826927594, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 807699480, + "step": 8330 + }, + { + "epoch": 0.8144309737974188, + "loss": 0.09267403185367584, + "loss_ce": 0.007560512982308865, + "loss_iou": 0.26171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 807699480, + "step": 8330 + }, + { + "epoch": 0.8145287446226046, + "grad_norm": 5.637591211663489, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 807795804, + "step": 8331 + }, + { + "epoch": 0.8145287446226046, + "loss": 0.05883336439728737, + "loss_ce": 0.0026276158168911934, + "loss_iou": 0.25, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 807795804, + "step": 8331 + }, + { + "epoch": 0.8146265154477904, + "grad_norm": 8.468317980787033, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 807893176, + "step": 8332 + }, + { + "epoch": 0.8146265154477904, + "loss": 0.08336333185434341, + "loss_ce": 0.008572377264499664, + "loss_iou": 0.296875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 807893176, + "step": 8332 + }, + { + "epoch": 0.8147242862729761, + "grad_norm": 9.361158433403594, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 807990944, + "step": 8333 + }, + { + "epoch": 0.8147242862729761, + "loss": 0.09242807328701019, + "loss_ce": 0.006078582722693682, + "loss_iou": 0.333984375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 807990944, + "step": 8333 + }, + { + "epoch": 0.8148220570981619, + "grad_norm": 9.69733255834253, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 808088284, + "step": 8334 + }, + { + "epoch": 0.8148220570981619, + "loss": 0.1008276641368866, + "loss_ce": 0.006284207105636597, + "loss_iou": 0.3515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 808088284, + "step": 8334 + }, + { + "epoch": 0.8149198279233477, + "grad_norm": 22.118058051971996, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 808185072, + "step": 8335 + }, + { + "epoch": 0.8149198279233477, + "loss": 0.12115909904241562, + "loss_ce": 0.008030442520976067, + "loss_iou": 0.25390625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 808185072, + "step": 8335 + }, + { + "epoch": 0.8150175987485334, + "grad_norm": 6.915161891785861, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 808282612, + "step": 8336 + }, + { + "epoch": 0.8150175987485334, + "loss": 0.07687418162822723, + "loss_ce": 0.001770424423739314, + "loss_iou": 0.380859375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 808282612, + "step": 8336 + }, + { + "epoch": 0.8151153695737192, + "grad_norm": 2.645692524576176, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 808379092, + "step": 8337 + }, + { + "epoch": 0.8151153695737192, + "loss": 0.06217075139284134, + "loss_ce": 0.004004248417913914, + "loss_iou": 0.2060546875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 808379092, + "step": 8337 + }, + { + "epoch": 0.8152131403989049, + "grad_norm": 8.950826866349509, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 808476456, + "step": 8338 + }, + { + "epoch": 0.8152131403989049, + "loss": 0.05533473566174507, + "loss_ce": 0.005453753285109997, + "loss_iou": 0.28515625, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 808476456, + "step": 8338 + }, + { + "epoch": 0.8153109112240907, + "grad_norm": 5.228823327805466, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 808573488, + "step": 8339 + }, + { + "epoch": 0.8153109112240907, + "loss": 0.05168990418314934, + "loss_ce": 0.0019843962509185076, + "loss_iou": 0.275390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 808573488, + "step": 8339 + }, + { + "epoch": 0.8154086820492765, + "grad_norm": 3.86346385409607, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 808670100, + "step": 8340 + }, + { + "epoch": 0.8154086820492765, + "loss": 0.05883108451962471, + "loss_ce": 0.005493986885994673, + "loss_iou": 0.283203125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 808670100, + "step": 8340 + }, + { + "epoch": 0.8155064528744622, + "grad_norm": 9.891944052635608, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 808766876, + "step": 8341 + }, + { + "epoch": 0.8155064528744622, + "loss": 0.05227365344762802, + "loss_ce": 0.006932161748409271, + "loss_iou": 0.232421875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 808766876, + "step": 8341 + }, + { + "epoch": 0.815604223699648, + "grad_norm": 2.7452665957842863, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 808864528, + "step": 8342 + }, + { + "epoch": 0.815604223699648, + "loss": 0.056198615580797195, + "loss_ce": 0.004166143015027046, + "loss_iou": 0.30859375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 808864528, + "step": 8342 + }, + { + "epoch": 0.8157019945248338, + "grad_norm": 2.304127980885625, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 808960440, + "step": 8343 + }, + { + "epoch": 0.8157019945248338, + "loss": 0.11629356443881989, + "loss_ce": 0.005453719757497311, + "loss_iou": 0.2294921875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 808960440, + "step": 8343 + }, + { + "epoch": 0.8157997653500195, + "grad_norm": 5.759834430953493, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 809057036, + "step": 8344 + }, + { + "epoch": 0.8157997653500195, + "loss": 0.06838318705558777, + "loss_ce": 0.006974189542233944, + "loss_iou": 0.259765625, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 809057036, + "step": 8344 + }, + { + "epoch": 0.8158975361752053, + "grad_norm": 5.030484501425123, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 809154016, + "step": 8345 + }, + { + "epoch": 0.8158975361752053, + "loss": 0.06261402368545532, + "loss_ce": 0.0049205441027879715, + "loss_iou": 0.287109375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 809154016, + "step": 8345 + }, + { + "epoch": 0.8159953070003911, + "grad_norm": 2.261936545929807, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 809250492, + "step": 8346 + }, + { + "epoch": 0.8159953070003911, + "loss": 0.048529837280511856, + "loss_ce": 0.00448915921151638, + "loss_iou": 0.2353515625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 809250492, + "step": 8346 + }, + { + "epoch": 0.8160930778255768, + "grad_norm": 6.398938053692686, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 809346712, + "step": 8347 + }, + { + "epoch": 0.8160930778255768, + "loss": 0.08574442565441132, + "loss_ce": 0.009673637337982655, + "loss_iou": 0.21875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 809346712, + "step": 8347 + }, + { + "epoch": 0.8161908486507626, + "grad_norm": 3.522705202952259, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 809445016, + "step": 8348 + }, + { + "epoch": 0.8161908486507626, + "loss": 0.11480788886547089, + "loss_ce": 0.008820340037345886, + "loss_iou": 0.41015625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 809445016, + "step": 8348 + }, + { + "epoch": 0.8162886194759483, + "grad_norm": 9.8942656576872, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 809542344, + "step": 8349 + }, + { + "epoch": 0.8162886194759483, + "loss": 0.07494841516017914, + "loss_ce": 0.004292594734579325, + "loss_iou": 0.23828125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 809542344, + "step": 8349 + }, + { + "epoch": 0.8163863903011341, + "grad_norm": 13.171265678053544, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 809639672, + "step": 8350 + }, + { + "epoch": 0.8163863903011341, + "loss": 0.10994333773851395, + "loss_ce": 0.006214087828993797, + "loss_iou": 0.291015625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 809639672, + "step": 8350 + }, + { + "epoch": 0.8164841611263199, + "grad_norm": 2.9891777709097416, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 809735744, + "step": 8351 + }, + { + "epoch": 0.8164841611263199, + "loss": 0.04161328449845314, + "loss_ce": 0.0020167275797575712, + "loss_iou": 0.3203125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 809735744, + "step": 8351 + }, + { + "epoch": 0.8165819319515056, + "grad_norm": 6.028987581721628, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 809832740, + "step": 8352 + }, + { + "epoch": 0.8165819319515056, + "loss": 0.11468159407377243, + "loss_ce": 0.007557264529168606, + "loss_iou": 0.298828125, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 809832740, + "step": 8352 + }, + { + "epoch": 0.8166797027766914, + "grad_norm": 14.821397604673658, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 809929228, + "step": 8353 + }, + { + "epoch": 0.8166797027766914, + "loss": 0.08864745497703552, + "loss_ce": 0.0037628132849931717, + "loss_iou": 0.248046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 809929228, + "step": 8353 + }, + { + "epoch": 0.8167774736018772, + "grad_norm": 9.843953661974654, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 810025844, + "step": 8354 + }, + { + "epoch": 0.8167774736018772, + "loss": 0.0571042038500309, + "loss_ce": 0.002134413691237569, + "loss_iou": 0.271484375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 810025844, + "step": 8354 + }, + { + "epoch": 0.8168752444270629, + "grad_norm": 9.746617023010522, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 810123256, + "step": 8355 + }, + { + "epoch": 0.8168752444270629, + "loss": 0.0794321596622467, + "loss_ce": 0.005472810938954353, + "loss_iou": 0.271484375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 810123256, + "step": 8355 + }, + { + "epoch": 0.8169730152522487, + "grad_norm": 8.737665943946665, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 810220080, + "step": 8356 + }, + { + "epoch": 0.8169730152522487, + "loss": 0.05538376420736313, + "loss_ce": 0.004060827195644379, + "loss_iou": 0.27734375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 810220080, + "step": 8356 + }, + { + "epoch": 0.8170707860774344, + "grad_norm": 3.935964493357561, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 810316848, + "step": 8357 + }, + { + "epoch": 0.8170707860774344, + "loss": 0.05429084226489067, + "loss_ce": 0.007537912577390671, + "loss_iou": 0.34765625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 810316848, + "step": 8357 + }, + { + "epoch": 0.8171685569026202, + "grad_norm": 25.22573541386756, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 810414396, + "step": 8358 + }, + { + "epoch": 0.8171685569026202, + "loss": 0.09278806298971176, + "loss_ce": 0.006858191452920437, + "loss_iou": 0.345703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 810414396, + "step": 8358 + }, + { + "epoch": 0.817266327727806, + "grad_norm": 4.220073378831266, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 810511604, + "step": 8359 + }, + { + "epoch": 0.817266327727806, + "loss": 0.0567089319229126, + "loss_ce": 0.004329323768615723, + "loss_iou": 0.287109375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 810511604, + "step": 8359 + }, + { + "epoch": 0.8173640985529917, + "grad_norm": 14.970489561146627, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 810609136, + "step": 8360 + }, + { + "epoch": 0.8173640985529917, + "loss": 0.06307639926671982, + "loss_ce": 0.009533312171697617, + "loss_iou": 0.259765625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 810609136, + "step": 8360 + }, + { + "epoch": 0.8174618693781776, + "grad_norm": 36.31382560505549, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 810706520, + "step": 8361 + }, + { + "epoch": 0.8174618693781776, + "loss": 0.08176977932453156, + "loss_ce": 0.004896001424640417, + "loss_iou": 0.384765625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 810706520, + "step": 8361 + }, + { + "epoch": 0.8175596402033634, + "grad_norm": 13.896557104767018, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 810803880, + "step": 8362 + }, + { + "epoch": 0.8175596402033634, + "loss": 0.10116364061832428, + "loss_ce": 0.0031411757227033377, + "loss_iou": 0.2734375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 810803880, + "step": 8362 + }, + { + "epoch": 0.817657411028549, + "grad_norm": 6.458498472117525, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 810900828, + "step": 8363 + }, + { + "epoch": 0.817657411028549, + "loss": 0.0622769258916378, + "loss_ce": 0.003011788008734584, + "loss_iou": 0.302734375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 810900828, + "step": 8363 + }, + { + "epoch": 0.8177551818537349, + "grad_norm": 7.5241180253323785, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 810998528, + "step": 8364 + }, + { + "epoch": 0.8177551818537349, + "loss": 0.06380604952573776, + "loss_ce": 0.006234639789909124, + "loss_iou": 0.28125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 810998528, + "step": 8364 + }, + { + "epoch": 0.8178529526789207, + "grad_norm": 8.073073169538496, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 811094012, + "step": 8365 + }, + { + "epoch": 0.8178529526789207, + "loss": 0.05526421219110489, + "loss_ce": 0.006893852725625038, + "loss_iou": 0.154296875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 811094012, + "step": 8365 + }, + { + "epoch": 0.8179507235041064, + "grad_norm": 36.20796723014419, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 811191064, + "step": 8366 + }, + { + "epoch": 0.8179507235041064, + "loss": 0.1161808893084526, + "loss_ce": 0.005066387355327606, + "loss_iou": 0.3125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 811191064, + "step": 8366 + }, + { + "epoch": 0.8180484943292922, + "grad_norm": 6.925441513594107, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 811287772, + "step": 8367 + }, + { + "epoch": 0.8180484943292922, + "loss": 0.0783342719078064, + "loss_ce": 0.0060285963118076324, + "loss_iou": 0.30859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 811287772, + "step": 8367 + }, + { + "epoch": 0.8181462651544779, + "grad_norm": 5.960991384564968, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 811384584, + "step": 8368 + }, + { + "epoch": 0.8181462651544779, + "loss": 0.08021283894777298, + "loss_ce": 0.0016300736460834742, + "loss_iou": 0.296875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 811384584, + "step": 8368 + }, + { + "epoch": 0.8182440359796637, + "grad_norm": 2.583975719463489, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 811481036, + "step": 8369 + }, + { + "epoch": 0.8182440359796637, + "loss": 0.04179592430591583, + "loss_ce": 0.008108330890536308, + "loss_iou": 0.224609375, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 811481036, + "step": 8369 + }, + { + "epoch": 0.8183418068048495, + "grad_norm": 13.09368100499215, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 811578560, + "step": 8370 + }, + { + "epoch": 0.8183418068048495, + "loss": 0.1044565960764885, + "loss_ce": 0.005839049816131592, + "loss_iou": 0.24609375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 811578560, + "step": 8370 + }, + { + "epoch": 0.8184395776300352, + "grad_norm": 31.950599232824853, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 811675528, + "step": 8371 + }, + { + "epoch": 0.8184395776300352, + "loss": 0.1242315024137497, + "loss_ce": 0.004724665079265833, + "loss_iou": 0.185546875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 811675528, + "step": 8371 + }, + { + "epoch": 0.818537348455221, + "grad_norm": 18.31645319624523, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 811771596, + "step": 8372 + }, + { + "epoch": 0.818537348455221, + "loss": 0.08406930416822433, + "loss_ce": 0.006768277380615473, + "loss_iou": 0.21875, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 811771596, + "step": 8372 + }, + { + "epoch": 0.8186351192804068, + "grad_norm": 5.668365744789894, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 811868512, + "step": 8373 + }, + { + "epoch": 0.8186351192804068, + "loss": 0.08647966384887695, + "loss_ce": 0.005211349576711655, + "loss_iou": 0.19921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 811868512, + "step": 8373 + }, + { + "epoch": 0.8187328901055925, + "grad_norm": 9.841495640415063, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 811966940, + "step": 8374 + }, + { + "epoch": 0.8187328901055925, + "loss": 0.05884960666298866, + "loss_ce": 0.006435663904994726, + "loss_iou": 0.3515625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 811966940, + "step": 8374 + }, + { + "epoch": 0.8188306609307783, + "grad_norm": 10.109259448357687, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 812064488, + "step": 8375 + }, + { + "epoch": 0.8188306609307783, + "loss": 0.056569136679172516, + "loss_ce": 0.0038500232622027397, + "loss_iou": 0.283203125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 812064488, + "step": 8375 + }, + { + "epoch": 0.818928431755964, + "grad_norm": 8.269599781247031, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 812161692, + "step": 8376 + }, + { + "epoch": 0.818928431755964, + "loss": 0.10452896356582642, + "loss_ce": 0.008558804169297218, + "loss_iou": 0.28125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 812161692, + "step": 8376 + }, + { + "epoch": 0.8190262025811498, + "grad_norm": 2.1936993628822976, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 812259028, + "step": 8377 + }, + { + "epoch": 0.8190262025811498, + "loss": 0.1153184324502945, + "loss_ce": 0.004463328514248133, + "loss_iou": 0.259765625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 812259028, + "step": 8377 + }, + { + "epoch": 0.8191239734063356, + "grad_norm": 27.53375119669187, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 812356784, + "step": 8378 + }, + { + "epoch": 0.8191239734063356, + "loss": 0.0966910570859909, + "loss_ce": 0.00548927579075098, + "loss_iou": 0.298828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 812356784, + "step": 8378 + }, + { + "epoch": 0.8192217442315213, + "grad_norm": 17.143241044129503, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 812454116, + "step": 8379 + }, + { + "epoch": 0.8192217442315213, + "loss": 0.0752459317445755, + "loss_ce": 0.0010424440260976553, + "loss_iou": 0.26171875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 812454116, + "step": 8379 + }, + { + "epoch": 0.8193195150567071, + "grad_norm": 6.016040418149055, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 812551048, + "step": 8380 + }, + { + "epoch": 0.8193195150567071, + "loss": 0.10062519460916519, + "loss_ce": 0.005150944460183382, + "loss_iou": 0.2138671875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 812551048, + "step": 8380 + }, + { + "epoch": 0.8194172858818929, + "grad_norm": 2.784731451667499, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 812648308, + "step": 8381 + }, + { + "epoch": 0.8194172858818929, + "loss": 0.08974204957485199, + "loss_ce": 0.004956582561135292, + "loss_iou": 0.271484375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 812648308, + "step": 8381 + }, + { + "epoch": 0.8195150567070786, + "grad_norm": 12.590244383423437, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 812745376, + "step": 8382 + }, + { + "epoch": 0.8195150567070786, + "loss": 0.06026417016983032, + "loss_ce": 0.006507451180368662, + "loss_iou": 0.3203125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 812745376, + "step": 8382 + }, + { + "epoch": 0.8196128275322644, + "grad_norm": 4.136261774513868, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 812842020, + "step": 8383 + }, + { + "epoch": 0.8196128275322644, + "loss": 0.08758947998285294, + "loss_ce": 0.004856324288994074, + "loss_iou": 0.2060546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 812842020, + "step": 8383 + }, + { + "epoch": 0.8197105983574501, + "grad_norm": 12.085620796906094, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 812938796, + "step": 8384 + }, + { + "epoch": 0.8197105983574501, + "loss": 0.12946642935276031, + "loss_ce": 0.013339415192604065, + "loss_iou": 0.255859375, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 812938796, + "step": 8384 + }, + { + "epoch": 0.8198083691826359, + "grad_norm": 6.543494733989381, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 813036704, + "step": 8385 + }, + { + "epoch": 0.8198083691826359, + "loss": 0.058222632855176926, + "loss_ce": 0.005503516644239426, + "loss_iou": 0.279296875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 813036704, + "step": 8385 + }, + { + "epoch": 0.8199061400078217, + "grad_norm": 21.042779656037382, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 813134136, + "step": 8386 + }, + { + "epoch": 0.8199061400078217, + "loss": 0.09232956171035767, + "loss_ce": 0.005827491171658039, + "loss_iou": 0.279296875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 813134136, + "step": 8386 + }, + { + "epoch": 0.8200039108330074, + "grad_norm": 12.412370509267623, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 813231728, + "step": 8387 + }, + { + "epoch": 0.8200039108330074, + "loss": 0.05990567058324814, + "loss_ce": 0.005530978552997112, + "loss_iou": 0.3203125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 813231728, + "step": 8387 + }, + { + "epoch": 0.8201016816581932, + "grad_norm": 11.64906101782533, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 813327904, + "step": 8388 + }, + { + "epoch": 0.8201016816581932, + "loss": 0.11550596356391907, + "loss_ce": 0.004696642979979515, + "loss_iou": 0.271484375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 813327904, + "step": 8388 + }, + { + "epoch": 0.820199452483379, + "grad_norm": 10.706284839413692, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 813424880, + "step": 8389 + }, + { + "epoch": 0.820199452483379, + "loss": 0.12851352989673615, + "loss_ce": 0.006580546963959932, + "loss_iou": 0.19140625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 813424880, + "step": 8389 + }, + { + "epoch": 0.8202972233085647, + "grad_norm": 11.927512767393429, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 813522704, + "step": 8390 + }, + { + "epoch": 0.8202972233085647, + "loss": 0.07773333787918091, + "loss_ce": 0.007542906329035759, + "loss_iou": 0.265625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 813522704, + "step": 8390 + }, + { + "epoch": 0.8203949941337505, + "grad_norm": 7.66203568108889, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 813619468, + "step": 8391 + }, + { + "epoch": 0.8203949941337505, + "loss": 0.05052968114614487, + "loss_ce": 0.005813798401504755, + "loss_iou": 0.197265625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 813619468, + "step": 8391 + }, + { + "epoch": 0.8204927649589363, + "grad_norm": 9.468868326866446, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 813716336, + "step": 8392 + }, + { + "epoch": 0.8204927649589363, + "loss": 0.10480442643165588, + "loss_ce": 0.007621193770319223, + "loss_iou": 0.28515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 813716336, + "step": 8392 + }, + { + "epoch": 0.820590535784122, + "grad_norm": 17.827533178508798, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 813813784, + "step": 8393 + }, + { + "epoch": 0.820590535784122, + "loss": 0.046720899641513824, + "loss_ce": 0.0025848508812487125, + "loss_iou": 0.291015625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 813813784, + "step": 8393 + }, + { + "epoch": 0.8206883066093078, + "grad_norm": 5.027930839302532, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 813910596, + "step": 8394 + }, + { + "epoch": 0.8206883066093078, + "loss": 0.0780036598443985, + "loss_ce": 0.004364744294434786, + "loss_iou": 0.19140625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 813910596, + "step": 8394 + }, + { + "epoch": 0.8207860774344935, + "grad_norm": 4.4653288437063745, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 814007072, + "step": 8395 + }, + { + "epoch": 0.8207860774344935, + "loss": 0.10588213801383972, + "loss_ce": 0.005540342070162296, + "loss_iou": 0.244140625, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 814007072, + "step": 8395 + }, + { + "epoch": 0.8208838482596793, + "grad_norm": 13.93794881444948, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 814103544, + "step": 8396 + }, + { + "epoch": 0.8208838482596793, + "loss": 0.07471591979265213, + "loss_ce": 0.006795234978199005, + "loss_iou": 0.29296875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 814103544, + "step": 8396 + }, + { + "epoch": 0.8209816190848651, + "grad_norm": 19.043142534727117, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 814201040, + "step": 8397 + }, + { + "epoch": 0.8209816190848651, + "loss": 0.088653065264225, + "loss_ce": 0.008178218267858028, + "loss_iou": 0.33203125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 814201040, + "step": 8397 + }, + { + "epoch": 0.8210793899100508, + "grad_norm": 5.0452916700067885, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 814297928, + "step": 8398 + }, + { + "epoch": 0.8210793899100508, + "loss": 0.08771207928657532, + "loss_ce": 0.0034759363625198603, + "loss_iou": 0.359375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 814297928, + "step": 8398 + }, + { + "epoch": 0.8211771607352366, + "grad_norm": 7.692748928291957, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 814395792, + "step": 8399 + }, + { + "epoch": 0.8211771607352366, + "loss": 0.09209136664867401, + "loss_ce": 0.004933161661028862, + "loss_iou": 0.34765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 814395792, + "step": 8399 + }, + { + "epoch": 0.8212749315604224, + "grad_norm": 4.970991578217838, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 814492752, + "step": 8400 + }, + { + "epoch": 0.8212749315604224, + "loss": 0.10051600635051727, + "loss_ce": 0.008154558017849922, + "loss_iou": 0.369140625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 814492752, + "step": 8400 + }, + { + "epoch": 0.8213727023856081, + "grad_norm": 1.8833315940619293, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 814589932, + "step": 8401 + }, + { + "epoch": 0.8213727023856081, + "loss": 0.07262658327817917, + "loss_ce": 0.002909173723310232, + "loss_iou": 0.279296875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 814589932, + "step": 8401 + }, + { + "epoch": 0.8214704732107939, + "grad_norm": 5.554064750621247, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 814686336, + "step": 8402 + }, + { + "epoch": 0.8214704732107939, + "loss": 0.10677871108055115, + "loss_ce": 0.00628432584926486, + "loss_iou": 0.271484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 814686336, + "step": 8402 + }, + { + "epoch": 0.8215682440359796, + "grad_norm": 3.546505908396232, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 814783256, + "step": 8403 + }, + { + "epoch": 0.8215682440359796, + "loss": 0.03834023326635361, + "loss_ce": 0.004198690876364708, + "loss_iou": 0.341796875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 814783256, + "step": 8403 + }, + { + "epoch": 0.8216660148611654, + "grad_norm": 3.525487815759196, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 814880480, + "step": 8404 + }, + { + "epoch": 0.8216660148611654, + "loss": 0.042318060994148254, + "loss_ce": 0.004476265981793404, + "loss_iou": 0.359375, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 814880480, + "step": 8404 + }, + { + "epoch": 0.8217637856863512, + "grad_norm": 20.737450655544265, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 814977756, + "step": 8405 + }, + { + "epoch": 0.8217637856863512, + "loss": 0.078404501080513, + "loss_ce": 0.006596642546355724, + "loss_iou": 0.310546875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 814977756, + "step": 8405 + }, + { + "epoch": 0.8218615565115369, + "grad_norm": 23.871368430547488, + "learning_rate": 5e-05, + "loss": 0.1091, + "num_input_tokens_seen": 815074396, + "step": 8406 + }, + { + "epoch": 0.8218615565115369, + "loss": 0.09968559443950653, + "loss_ce": 0.005699086003005505, + "loss_iou": 0.310546875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 815074396, + "step": 8406 + }, + { + "epoch": 0.8219593273367227, + "grad_norm": 16.1058050201699, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 815170736, + "step": 8407 + }, + { + "epoch": 0.8219593273367227, + "loss": 0.07884074747562408, + "loss_ce": 0.0009942192118614912, + "loss_iou": 0.21484375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 815170736, + "step": 8407 + }, + { + "epoch": 0.8220570981619085, + "grad_norm": 11.469256524165816, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 815267060, + "step": 8408 + }, + { + "epoch": 0.8220570981619085, + "loss": 0.0973125770688057, + "loss_ce": 0.005759840365499258, + "loss_iou": 0.29296875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 815267060, + "step": 8408 + }, + { + "epoch": 0.8221548689870942, + "grad_norm": 2.987965058471278, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 815363464, + "step": 8409 + }, + { + "epoch": 0.8221548689870942, + "loss": 0.05753982067108154, + "loss_ce": 0.003889917628839612, + "loss_iou": 0.30859375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 815363464, + "step": 8409 + }, + { + "epoch": 0.82225263981228, + "grad_norm": 7.163810628346196, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 815460220, + "step": 8410 + }, + { + "epoch": 0.82225263981228, + "loss": 0.0739850401878357, + "loss_ce": 0.009417477063834667, + "loss_iou": 0.255859375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 815460220, + "step": 8410 + }, + { + "epoch": 0.8223504106374658, + "grad_norm": 9.955555887868401, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 815557704, + "step": 8411 + }, + { + "epoch": 0.8223504106374658, + "loss": 0.05921212211251259, + "loss_ce": 0.004906089510768652, + "loss_iou": 0.287109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 815557704, + "step": 8411 + }, + { + "epoch": 0.8224481814626515, + "grad_norm": 11.977631609181492, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 815653992, + "step": 8412 + }, + { + "epoch": 0.8224481814626515, + "loss": 0.08012043684720993, + "loss_ce": 0.004192702006548643, + "loss_iou": 0.2734375, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 815653992, + "step": 8412 + }, + { + "epoch": 0.8225459522878373, + "grad_norm": 4.4485882449340135, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 815751108, + "step": 8413 + }, + { + "epoch": 0.8225459522878373, + "loss": 0.07297733426094055, + "loss_ce": 0.004244121722877026, + "loss_iou": 0.263671875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 815751108, + "step": 8413 + }, + { + "epoch": 0.822643723113023, + "grad_norm": 4.45519494656002, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 815847692, + "step": 8414 + }, + { + "epoch": 0.822643723113023, + "loss": 0.05660928413271904, + "loss_ce": 0.0033866281155496836, + "loss_iou": 0.19140625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 815847692, + "step": 8414 + }, + { + "epoch": 0.8227414939382088, + "grad_norm": 8.951828256127664, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 815944460, + "step": 8415 + }, + { + "epoch": 0.8227414939382088, + "loss": 0.0588829480111599, + "loss_ce": 0.00450825085863471, + "loss_iou": 0.2412109375, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 815944460, + "step": 8415 + }, + { + "epoch": 0.8228392647633946, + "grad_norm": 6.969918401857369, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 816041876, + "step": 8416 + }, + { + "epoch": 0.8228392647633946, + "loss": 0.05389651656150818, + "loss_ce": 0.002337065525352955, + "loss_iou": 0.31640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 816041876, + "step": 8416 + }, + { + "epoch": 0.8229370355885803, + "grad_norm": 2.826862503806479, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 816139080, + "step": 8417 + }, + { + "epoch": 0.8229370355885803, + "loss": 0.06683429330587387, + "loss_ce": 0.004151185508817434, + "loss_iou": 0.25390625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 816139080, + "step": 8417 + }, + { + "epoch": 0.8230348064137661, + "grad_norm": 3.991096407392016, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 816234952, + "step": 8418 + }, + { + "epoch": 0.8230348064137661, + "loss": 0.092795729637146, + "loss_ce": 0.0067437863908708096, + "loss_iou": 0.2275390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 816234952, + "step": 8418 + }, + { + "epoch": 0.823132577238952, + "grad_norm": 7.673181098133067, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 816331784, + "step": 8419 + }, + { + "epoch": 0.823132577238952, + "loss": 0.05583711713552475, + "loss_ce": 0.006093461532145739, + "loss_iou": 0.1630859375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 816331784, + "step": 8419 + }, + { + "epoch": 0.8232303480641376, + "grad_norm": 19.943723991301596, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 816428928, + "step": 8420 + }, + { + "epoch": 0.8232303480641376, + "loss": 0.047144461423158646, + "loss_ce": 0.0020242207683622837, + "loss_iou": 0.32421875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 816428928, + "step": 8420 + }, + { + "epoch": 0.8233281188893234, + "grad_norm": 6.007141529339614, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 816525512, + "step": 8421 + }, + { + "epoch": 0.8233281188893234, + "loss": 0.09808039665222168, + "loss_ce": 0.01720118522644043, + "loss_iou": 0.19921875, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 816525512, + "step": 8421 + }, + { + "epoch": 0.8234258897145091, + "grad_norm": 7.262759456925348, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 816622976, + "step": 8422 + }, + { + "epoch": 0.8234258897145091, + "loss": 0.08825455605983734, + "loss_ce": 0.0041404771618545055, + "loss_iou": 0.400390625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 816622976, + "step": 8422 + }, + { + "epoch": 0.823523660539695, + "grad_norm": 23.891074923106697, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 816719944, + "step": 8423 + }, + { + "epoch": 0.823523660539695, + "loss": 0.08888693153858185, + "loss_ce": 0.006031710654497147, + "loss_iou": 0.388671875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 816719944, + "step": 8423 + }, + { + "epoch": 0.8236214313648808, + "grad_norm": 18.833235831003, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 816816900, + "step": 8424 + }, + { + "epoch": 0.8236214313648808, + "loss": 0.07202447950839996, + "loss_ce": 0.005419865250587463, + "loss_iou": 0.32421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 816816900, + "step": 8424 + }, + { + "epoch": 0.8237192021900664, + "grad_norm": 7.532791531360852, + "learning_rate": 5e-05, + "loss": 0.1292, + "num_input_tokens_seen": 816913500, + "step": 8425 + }, + { + "epoch": 0.8237192021900664, + "loss": 0.1597025990486145, + "loss_ce": 0.008182816207408905, + "loss_iou": 0.294921875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 816913500, + "step": 8425 + }, + { + "epoch": 0.8238169730152523, + "grad_norm": 3.8534950355701914, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 817010320, + "step": 8426 + }, + { + "epoch": 0.8238169730152523, + "loss": 0.0869281142950058, + "loss_ce": 0.003920304123312235, + "loss_iou": 0.2890625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 817010320, + "step": 8426 + }, + { + "epoch": 0.8239147438404381, + "grad_norm": 4.147034218034352, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 817107240, + "step": 8427 + }, + { + "epoch": 0.8239147438404381, + "loss": 0.04093752056360245, + "loss_ce": 0.0038891821168363094, + "loss_iou": 0.27734375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 817107240, + "step": 8427 + }, + { + "epoch": 0.8240125146656238, + "grad_norm": 21.55617877460735, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 817204696, + "step": 8428 + }, + { + "epoch": 0.8240125146656238, + "loss": 0.11661422997713089, + "loss_ce": 0.007483370136469603, + "loss_iou": 0.25390625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 817204696, + "step": 8428 + }, + { + "epoch": 0.8241102854908096, + "grad_norm": 19.97425822790648, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 817301752, + "step": 8429 + }, + { + "epoch": 0.8241102854908096, + "loss": 0.05273909494280815, + "loss_ce": 0.006482076831161976, + "loss_iou": 0.296875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 817301752, + "step": 8429 + }, + { + "epoch": 0.8242080563159953, + "grad_norm": 6.718538090312378, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 817398744, + "step": 8430 + }, + { + "epoch": 0.8242080563159953, + "loss": 0.08971597254276276, + "loss_ce": 0.010614410042762756, + "loss_iou": 0.2734375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 817398744, + "step": 8430 + }, + { + "epoch": 0.8243058271411811, + "grad_norm": 9.657643142891699, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 817496400, + "step": 8431 + }, + { + "epoch": 0.8243058271411811, + "loss": 0.05643625557422638, + "loss_ce": 0.006784155499190092, + "loss_iou": 0.32421875, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 817496400, + "step": 8431 + }, + { + "epoch": 0.8244035979663669, + "grad_norm": 14.257964997705576, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 817592420, + "step": 8432 + }, + { + "epoch": 0.8244035979663669, + "loss": 0.11457079648971558, + "loss_ce": 0.009834472090005875, + "loss_iou": 0.255859375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 817592420, + "step": 8432 + }, + { + "epoch": 0.8245013687915526, + "grad_norm": 18.27968879340801, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 817688728, + "step": 8433 + }, + { + "epoch": 0.8245013687915526, + "loss": 0.08689334988594055, + "loss_ce": 0.01371219102293253, + "loss_iou": 0.2734375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 817688728, + "step": 8433 + }, + { + "epoch": 0.8245991396167384, + "grad_norm": 14.148232669719777, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 817786668, + "step": 8434 + }, + { + "epoch": 0.8245991396167384, + "loss": 0.09007062017917633, + "loss_ce": 0.0034769894555211067, + "loss_iou": 0.431640625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 817786668, + "step": 8434 + }, + { + "epoch": 0.8246969104419242, + "grad_norm": 6.084679229148237, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 817884644, + "step": 8435 + }, + { + "epoch": 0.8246969104419242, + "loss": 0.05233535170555115, + "loss_ce": 0.0034156739711761475, + "loss_iou": 0.33984375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 817884644, + "step": 8435 + }, + { + "epoch": 0.8247946812671099, + "grad_norm": 3.9879624921958596, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 817980424, + "step": 8436 + }, + { + "epoch": 0.8247946812671099, + "loss": 0.06490520387887955, + "loss_ce": 0.0035572443157434464, + "loss_iou": 0.193359375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 817980424, + "step": 8436 + }, + { + "epoch": 0.8248924520922957, + "grad_norm": 3.6214867834662265, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 818076100, + "step": 8437 + }, + { + "epoch": 0.8248924520922957, + "loss": 0.10227959603071213, + "loss_ce": 0.005371027626097202, + "loss_iou": 0.2421875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 818076100, + "step": 8437 + }, + { + "epoch": 0.8249902229174815, + "grad_norm": 6.442350658982947, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 818172456, + "step": 8438 + }, + { + "epoch": 0.8249902229174815, + "loss": 0.03371315822005272, + "loss_ce": 0.002768334001302719, + "loss_iou": 0.27734375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 818172456, + "step": 8438 + }, + { + "epoch": 0.8250879937426672, + "grad_norm": 5.740021594142296, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 818268604, + "step": 8439 + }, + { + "epoch": 0.8250879937426672, + "loss": 0.04997667670249939, + "loss_ce": 0.00692400150001049, + "loss_iou": 0.173828125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 818268604, + "step": 8439 + }, + { + "epoch": 0.825185764567853, + "grad_norm": 16.494687813533947, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 818365104, + "step": 8440 + }, + { + "epoch": 0.825185764567853, + "loss": 0.07512910664081573, + "loss_ce": 0.004961559548974037, + "loss_iou": 0.259765625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 818365104, + "step": 8440 + }, + { + "epoch": 0.8252835353930387, + "grad_norm": 16.749488784419167, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 818462136, + "step": 8441 + }, + { + "epoch": 0.8252835353930387, + "loss": 0.06281962990760803, + "loss_ce": 0.005950120277702808, + "loss_iou": 0.31640625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 818462136, + "step": 8441 + }, + { + "epoch": 0.8253813062182245, + "grad_norm": 4.062865241116118, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 818558888, + "step": 8442 + }, + { + "epoch": 0.8253813062182245, + "loss": 0.05928313359618187, + "loss_ce": 0.005019063130021095, + "loss_iou": 0.201171875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 818558888, + "step": 8442 + }, + { + "epoch": 0.8254790770434103, + "grad_norm": 4.222007284036995, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 818655172, + "step": 8443 + }, + { + "epoch": 0.8254790770434103, + "loss": 0.043421849608421326, + "loss_ce": 0.0037489968817681074, + "loss_iou": 0.2265625, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 818655172, + "step": 8443 + }, + { + "epoch": 0.825576847868596, + "grad_norm": 10.99405896955448, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 818751552, + "step": 8444 + }, + { + "epoch": 0.825576847868596, + "loss": 0.04388582706451416, + "loss_ce": 0.0024505832698196173, + "loss_iou": 0.19140625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 818751552, + "step": 8444 + }, + { + "epoch": 0.8256746186937818, + "grad_norm": 6.257672159037122, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 818848640, + "step": 8445 + }, + { + "epoch": 0.8256746186937818, + "loss": 0.06841865181922913, + "loss_ce": 0.005155718419700861, + "loss_iou": 0.26953125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 818848640, + "step": 8445 + }, + { + "epoch": 0.8257723895189676, + "grad_norm": 10.16958755900707, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 818945480, + "step": 8446 + }, + { + "epoch": 0.8257723895189676, + "loss": 0.062024377286434174, + "loss_ce": 0.005315086804330349, + "loss_iou": 0.3828125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 818945480, + "step": 8446 + }, + { + "epoch": 0.8258701603441533, + "grad_norm": 6.153385712864302, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 819042080, + "step": 8447 + }, + { + "epoch": 0.8258701603441533, + "loss": 0.05999607592821121, + "loss_ce": 0.0037140315398573875, + "loss_iou": 0.181640625, + "loss_num": 0.01123046875, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 819042080, + "step": 8447 + }, + { + "epoch": 0.8259679311693391, + "grad_norm": 6.050544666970626, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 819139532, + "step": 8448 + }, + { + "epoch": 0.8259679311693391, + "loss": 0.06487204879522324, + "loss_ce": 0.006972576025873423, + "loss_iou": 0.29296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 819139532, + "step": 8448 + }, + { + "epoch": 0.8260657019945248, + "grad_norm": 9.769312225038881, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 819236708, + "step": 8449 + }, + { + "epoch": 0.8260657019945248, + "loss": 0.06584177911281586, + "loss_ce": 0.006897080689668655, + "loss_iou": 0.2138671875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 819236708, + "step": 8449 + }, + { + "epoch": 0.8261634728197106, + "grad_norm": 6.606616407640309, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 819332804, + "step": 8450 + }, + { + "epoch": 0.8261634728197106, + "loss": 0.0821879506111145, + "loss_ce": 0.006420433986932039, + "loss_iou": 0.26953125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 819332804, + "step": 8450 + }, + { + "epoch": 0.8262612436448964, + "grad_norm": 5.712113403511343, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 819429092, + "step": 8451 + }, + { + "epoch": 0.8262612436448964, + "loss": 0.09588594734668732, + "loss_ce": 0.005126670002937317, + "loss_iou": 0.251953125, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 819429092, + "step": 8451 + }, + { + "epoch": 0.8263590144700821, + "grad_norm": 7.992787924877447, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 819526008, + "step": 8452 + }, + { + "epoch": 0.8263590144700821, + "loss": 0.045250602066516876, + "loss_ce": 0.005409900564700365, + "loss_iou": 0.267578125, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 819526008, + "step": 8452 + }, + { + "epoch": 0.8264567852952679, + "grad_norm": 8.696722861786492, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 819623184, + "step": 8453 + }, + { + "epoch": 0.8264567852952679, + "loss": 0.12184055149555206, + "loss_ce": 0.007048674393445253, + "loss_iou": 0.296875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 819623184, + "step": 8453 + }, + { + "epoch": 0.8265545561204537, + "grad_norm": 43.87947070852298, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 819719540, + "step": 8454 + }, + { + "epoch": 0.8265545561204537, + "loss": 0.06772863864898682, + "loss_ce": 0.0016580818919464946, + "loss_iou": 0.275390625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 819719540, + "step": 8454 + }, + { + "epoch": 0.8266523269456394, + "grad_norm": 9.684514631840987, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 819816032, + "step": 8455 + }, + { + "epoch": 0.8266523269456394, + "loss": 0.07452596724033356, + "loss_ce": 0.0026179729029536247, + "loss_iou": 0.251953125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 819816032, + "step": 8455 + }, + { + "epoch": 0.8267500977708252, + "grad_norm": 18.757824268684935, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 819913524, + "step": 8456 + }, + { + "epoch": 0.8267500977708252, + "loss": 0.0576162114739418, + "loss_ce": 0.007224057801067829, + "loss_iou": 0.31640625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 819913524, + "step": 8456 + }, + { + "epoch": 0.826847868596011, + "grad_norm": 4.661418740655239, + "learning_rate": 5e-05, + "loss": 0.1015, + "num_input_tokens_seen": 820010200, + "step": 8457 + }, + { + "epoch": 0.826847868596011, + "loss": 0.08782774955034256, + "loss_ce": 0.0034466502256691456, + "loss_iou": 0.26953125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 820010200, + "step": 8457 + }, + { + "epoch": 0.8269456394211967, + "grad_norm": 3.191608637967162, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 820107348, + "step": 8458 + }, + { + "epoch": 0.8269456394211967, + "loss": 0.06228380277752876, + "loss_ce": 0.005070970393717289, + "loss_iou": 0.267578125, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 820107348, + "step": 8458 + }, + { + "epoch": 0.8270434102463825, + "grad_norm": 6.795129870064381, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 820204328, + "step": 8459 + }, + { + "epoch": 0.8270434102463825, + "loss": 0.06658490002155304, + "loss_ce": 0.005656558088958263, + "loss_iou": 0.3125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 820204328, + "step": 8459 + }, + { + "epoch": 0.8271411810715682, + "grad_norm": 7.756326926803207, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 820300764, + "step": 8460 + }, + { + "epoch": 0.8271411810715682, + "loss": 0.10785344988107681, + "loss_ce": 0.005436457693576813, + "loss_iou": 0.2578125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 820300764, + "step": 8460 + }, + { + "epoch": 0.827238951896754, + "grad_norm": 7.188871551616091, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 820397688, + "step": 8461 + }, + { + "epoch": 0.827238951896754, + "loss": 0.0692984014749527, + "loss_ce": 0.004799502901732922, + "loss_iou": 0.267578125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 820397688, + "step": 8461 + }, + { + "epoch": 0.8273367227219398, + "grad_norm": 2.8376605216279023, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 820494112, + "step": 8462 + }, + { + "epoch": 0.8273367227219398, + "loss": 0.08586530387401581, + "loss_ce": 0.004337592050433159, + "loss_iou": 0.2431640625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 820494112, + "step": 8462 + }, + { + "epoch": 0.8274344935471255, + "grad_norm": 5.99440288498265, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 820590924, + "step": 8463 + }, + { + "epoch": 0.8274344935471255, + "loss": 0.11243636906147003, + "loss_ce": 0.006830289028584957, + "loss_iou": 0.28125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 820590924, + "step": 8463 + }, + { + "epoch": 0.8275322643723113, + "grad_norm": 6.448163549757032, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 820687492, + "step": 8464 + }, + { + "epoch": 0.8275322643723113, + "loss": 0.07429397106170654, + "loss_ce": 0.006663199979811907, + "loss_iou": 0.20703125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 820687492, + "step": 8464 + }, + { + "epoch": 0.8276300351974971, + "grad_norm": 10.525121609951288, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 820784384, + "step": 8465 + }, + { + "epoch": 0.8276300351974971, + "loss": 0.12646131217479706, + "loss_ce": 0.004909798502922058, + "loss_iou": 0.26953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 820784384, + "step": 8465 + }, + { + "epoch": 0.8277278060226828, + "grad_norm": 7.495238712775207, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 820880632, + "step": 8466 + }, + { + "epoch": 0.8277278060226828, + "loss": 0.04758422076702118, + "loss_ce": 0.005096124019473791, + "loss_iou": 0.2177734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 820880632, + "step": 8466 + }, + { + "epoch": 0.8278255768478686, + "grad_norm": 13.095810818925969, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 820978096, + "step": 8467 + }, + { + "epoch": 0.8278255768478686, + "loss": 0.05738765746355057, + "loss_ce": 0.005065269768238068, + "loss_iou": 0.27734375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 820978096, + "step": 8467 + }, + { + "epoch": 0.8279233476730543, + "grad_norm": 10.35035088384925, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 821076376, + "step": 8468 + }, + { + "epoch": 0.8279233476730543, + "loss": 0.09890711307525635, + "loss_ce": 0.005355475470423698, + "loss_iou": 0.34375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 821076376, + "step": 8468 + }, + { + "epoch": 0.8280211184982401, + "grad_norm": 25.177052594337656, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 821173240, + "step": 8469 + }, + { + "epoch": 0.8280211184982401, + "loss": 0.10547187924385071, + "loss_ce": 0.008624346926808357, + "loss_iou": 0.189453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 821173240, + "step": 8469 + }, + { + "epoch": 0.8281188893234259, + "grad_norm": 46.71444733097435, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 821270656, + "step": 8470 + }, + { + "epoch": 0.8281188893234259, + "loss": 0.07288943231105804, + "loss_ce": 0.005384547635912895, + "loss_iou": 0.2138671875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 821270656, + "step": 8470 + }, + { + "epoch": 0.8282166601486116, + "grad_norm": 19.623457029458493, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 821367796, + "step": 8471 + }, + { + "epoch": 0.8282166601486116, + "loss": 0.06663508713245392, + "loss_ce": 0.00491328164935112, + "loss_iou": 0.33203125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 821367796, + "step": 8471 + }, + { + "epoch": 0.8283144309737974, + "grad_norm": 9.277339088346306, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 821464460, + "step": 8472 + }, + { + "epoch": 0.8283144309737974, + "loss": 0.0655599981546402, + "loss_ce": 0.0014730853727087379, + "loss_iou": 0.294921875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 821464460, + "step": 8472 + }, + { + "epoch": 0.8284122017989832, + "grad_norm": 6.9789311923557165, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 821562252, + "step": 8473 + }, + { + "epoch": 0.8284122017989832, + "loss": 0.073758564889431, + "loss_ce": 0.004865127615630627, + "loss_iou": 0.3046875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 821562252, + "step": 8473 + }, + { + "epoch": 0.8285099726241689, + "grad_norm": 8.23612483640482, + "learning_rate": 5e-05, + "loss": 0.0866, + "num_input_tokens_seen": 821659992, + "step": 8474 + }, + { + "epoch": 0.8285099726241689, + "loss": 0.08399452269077301, + "loss_ce": 0.009684216231107712, + "loss_iou": 0.310546875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 821659992, + "step": 8474 + }, + { + "epoch": 0.8286077434493547, + "grad_norm": 9.76366096213694, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 821756984, + "step": 8475 + }, + { + "epoch": 0.8286077434493547, + "loss": 0.062086399644613266, + "loss_ce": 0.004011447541415691, + "loss_iou": 0.27734375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 821756984, + "step": 8475 + }, + { + "epoch": 0.8287055142745404, + "grad_norm": 15.046959158342558, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 821853964, + "step": 8476 + }, + { + "epoch": 0.8287055142745404, + "loss": 0.07559747248888016, + "loss_ce": 0.005204861983656883, + "loss_iou": 0.2294921875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 821853964, + "step": 8476 + }, + { + "epoch": 0.8288032850997262, + "grad_norm": 91.1777795039594, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 821950356, + "step": 8477 + }, + { + "epoch": 0.8288032850997262, + "loss": 0.08466802537441254, + "loss_ce": 0.007199150510132313, + "loss_iou": 0.2158203125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 821950356, + "step": 8477 + }, + { + "epoch": 0.828901055924912, + "grad_norm": 9.711274311288639, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 822047416, + "step": 8478 + }, + { + "epoch": 0.828901055924912, + "loss": 0.0941617488861084, + "loss_ce": 0.004806274548172951, + "loss_iou": 0.3125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 822047416, + "step": 8478 + }, + { + "epoch": 0.8289988267500977, + "grad_norm": 51.18119209482963, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 822144788, + "step": 8479 + }, + { + "epoch": 0.8289988267500977, + "loss": 0.07663016021251678, + "loss_ce": 0.0046849725767970085, + "loss_iou": 0.26171875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 822144788, + "step": 8479 + }, + { + "epoch": 0.8290965975752835, + "grad_norm": 16.665969627954617, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 822241488, + "step": 8480 + }, + { + "epoch": 0.8290965975752835, + "loss": 0.08878776431083679, + "loss_ce": 0.005154344718903303, + "loss_iou": 0.259765625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 822241488, + "step": 8480 + }, + { + "epoch": 0.8291943684004693, + "grad_norm": 7.995345490570566, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 822339216, + "step": 8481 + }, + { + "epoch": 0.8291943684004693, + "loss": 0.07717764377593994, + "loss_ce": 0.0023142052814364433, + "loss_iou": 0.251953125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 822339216, + "step": 8481 + }, + { + "epoch": 0.829292139225655, + "grad_norm": 5.88448804052416, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 822435816, + "step": 8482 + }, + { + "epoch": 0.829292139225655, + "loss": 0.10235626250505447, + "loss_ce": 0.006332707591354847, + "loss_iou": 0.298828125, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 822435816, + "step": 8482 + }, + { + "epoch": 0.8293899100508408, + "grad_norm": 5.194163114174809, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 822532304, + "step": 8483 + }, + { + "epoch": 0.8293899100508408, + "loss": 0.09795030951499939, + "loss_ce": 0.007053699344396591, + "loss_iou": 0.28515625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 822532304, + "step": 8483 + }, + { + "epoch": 0.8294876808760266, + "grad_norm": 15.114646199778575, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 822628684, + "step": 8484 + }, + { + "epoch": 0.8294876808760266, + "loss": 0.09840045124292374, + "loss_ce": 0.005764340050518513, + "loss_iou": 0.33203125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 822628684, + "step": 8484 + }, + { + "epoch": 0.8295854517012123, + "grad_norm": 46.404767551340036, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 822725896, + "step": 8485 + }, + { + "epoch": 0.8295854517012123, + "loss": 0.0669693574309349, + "loss_ce": 0.0036148610524833202, + "loss_iou": 0.400390625, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 822725896, + "step": 8485 + }, + { + "epoch": 0.8296832225263981, + "grad_norm": 5.503498389786947, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 822823292, + "step": 8486 + }, + { + "epoch": 0.8296832225263981, + "loss": 0.07939158380031586, + "loss_ce": 0.0033417842350900173, + "loss_iou": 0.365234375, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 822823292, + "step": 8486 + }, + { + "epoch": 0.8297809933515838, + "grad_norm": 3.041997774288281, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 822919204, + "step": 8487 + }, + { + "epoch": 0.8297809933515838, + "loss": 0.0840664729475975, + "loss_ce": 0.006780704483389854, + "loss_iou": 0.330078125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 822919204, + "step": 8487 + }, + { + "epoch": 0.8298787641767696, + "grad_norm": 4.681932952037273, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 823016168, + "step": 8488 + }, + { + "epoch": 0.8298787641767696, + "loss": 0.08999744057655334, + "loss_ce": 0.012093689292669296, + "loss_iou": 0.244140625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 823016168, + "step": 8488 + }, + { + "epoch": 0.8299765350019555, + "grad_norm": 12.029009829136141, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 823114272, + "step": 8489 + }, + { + "epoch": 0.8299765350019555, + "loss": 0.09761994332075119, + "loss_ce": 0.005517890676856041, + "loss_iou": 0.34375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 823114272, + "step": 8489 + }, + { + "epoch": 0.8300743058271411, + "grad_norm": 12.674948647994292, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 823210732, + "step": 8490 + }, + { + "epoch": 0.8300743058271411, + "loss": 0.06960517168045044, + "loss_ce": 0.0061133503913879395, + "loss_iou": 0.279296875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 823210732, + "step": 8490 + }, + { + "epoch": 0.830172076652327, + "grad_norm": 9.89278835576078, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 823308304, + "step": 8491 + }, + { + "epoch": 0.830172076652327, + "loss": 0.0877930223941803, + "loss_ce": 0.0024201024789363146, + "loss_iou": 0.283203125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 823308304, + "step": 8491 + }, + { + "epoch": 0.8302698474775128, + "grad_norm": 11.16950720921915, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 823404956, + "step": 8492 + }, + { + "epoch": 0.8302698474775128, + "loss": 0.06007865071296692, + "loss_ce": 0.006920842919498682, + "loss_iou": 0.1904296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 823404956, + "step": 8492 + }, + { + "epoch": 0.8303676183026985, + "grad_norm": 16.074894799266847, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 823502620, + "step": 8493 + }, + { + "epoch": 0.8303676183026985, + "loss": 0.0899195447564125, + "loss_ce": 0.0060877553187310696, + "loss_iou": 0.31640625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 823502620, + "step": 8493 + }, + { + "epoch": 0.8304653891278843, + "grad_norm": 19.417962772385028, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 823600108, + "step": 8494 + }, + { + "epoch": 0.8304653891278843, + "loss": 0.06834110617637634, + "loss_ce": 0.005322309210896492, + "loss_iou": 0.33984375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 823600108, + "step": 8494 + }, + { + "epoch": 0.83056315995307, + "grad_norm": 4.184344674807653, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 823696808, + "step": 8495 + }, + { + "epoch": 0.83056315995307, + "loss": 0.03162422776222229, + "loss_ce": 0.005142598878592253, + "loss_iou": 0.30078125, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 823696808, + "step": 8495 + }, + { + "epoch": 0.8306609307782558, + "grad_norm": 4.519104595345124, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 823793952, + "step": 8496 + }, + { + "epoch": 0.8306609307782558, + "loss": 0.06286118924617767, + "loss_ce": 0.004275070503354073, + "loss_iou": 0.2216796875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 823793952, + "step": 8496 + }, + { + "epoch": 0.8307587016034416, + "grad_norm": 9.64470890126337, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 823890072, + "step": 8497 + }, + { + "epoch": 0.8307587016034416, + "loss": 0.06956089287996292, + "loss_ce": 0.003749737050384283, + "loss_iou": 0.251953125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 823890072, + "step": 8497 + }, + { + "epoch": 0.8308564724286273, + "grad_norm": 81.14622599795683, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 823986668, + "step": 8498 + }, + { + "epoch": 0.8308564724286273, + "loss": 0.09686532616615295, + "loss_ce": 0.005983980372548103, + "loss_iou": 0.205078125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 823986668, + "step": 8498 + }, + { + "epoch": 0.8309542432538131, + "grad_norm": 9.519030659289854, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 824083708, + "step": 8499 + }, + { + "epoch": 0.8309542432538131, + "loss": 0.092827707529068, + "loss_ce": 0.006768145598471165, + "loss_iou": 0.32421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 824083708, + "step": 8499 + }, + { + "epoch": 0.8310520140789989, + "grad_norm": 4.949294441565489, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 824179572, + "step": 8500 + }, + { + "epoch": 0.8310520140789989, + "eval_seeclick_CIoU": 0.5596144944429398, + "eval_seeclick_GIoU": 0.5689268857240677, + "eval_seeclick_IoU": 0.5988489836454391, + "eval_seeclick_MAE_all": 0.06893528439104557, + "eval_seeclick_MAE_h": 0.03312600031495094, + "eval_seeclick_MAE_w": 0.09829626977443695, + "eval_seeclick_MAE_x": 0.11145920678973198, + "eval_seeclick_MAE_y": 0.03285967092961073, + "eval_seeclick_NUM_probability": 0.9999983906745911, + "eval_seeclick_inside_bbox": 0.8309659063816071, + "eval_seeclick_loss": 0.22831134498119354, + "eval_seeclick_loss_ce": 0.010440738871693611, + "eval_seeclick_loss_iou": 0.363525390625, + "eval_seeclick_loss_num": 0.044475555419921875, + "eval_seeclick_loss_xval": 0.2222137451171875, + "eval_seeclick_runtime": 76.3521, + "eval_seeclick_samples_per_second": 0.563, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 824179572, + "step": 8500 + }, + { + "epoch": 0.8310520140789989, + "eval_icons_CIoU": 0.7226332128047943, + "eval_icons_GIoU": 0.7199908494949341, + "eval_icons_IoU": 0.7486093938350677, + "eval_icons_MAE_all": 0.0545694287866354, + "eval_icons_MAE_h": 0.0723088551312685, + "eval_icons_MAE_w": 0.03769731801003218, + "eval_icons_MAE_x": 0.038409510627388954, + "eval_icons_MAE_y": 0.06986202672123909, + "eval_icons_NUM_probability": 0.9999979436397552, + "eval_icons_inside_bbox": 0.8819444477558136, + "eval_icons_loss": 0.15340299904346466, + "eval_icons_loss_ce": 4.378510311653372e-06, + "eval_icons_loss_iou": 0.35540771484375, + "eval_icons_loss_num": 0.031726837158203125, + "eval_icons_loss_xval": 0.1585540771484375, + "eval_icons_runtime": 85.7971, + "eval_icons_samples_per_second": 0.583, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 824179572, + "step": 8500 + }, + { + "epoch": 0.8310520140789989, + "eval_screenspot_CIoU": 0.2696854844689369, + "eval_screenspot_GIoU": 0.243804598848025, + "eval_screenspot_IoU": 0.3730196605126063, + "eval_screenspot_MAE_all": 0.18071540941794714, + "eval_screenspot_MAE_h": 0.13394198815027872, + "eval_screenspot_MAE_w": 0.24810083707173666, + "eval_screenspot_MAE_x": 0.21174535155296326, + "eval_screenspot_MAE_y": 0.12907343854506811, + "eval_screenspot_NUM_probability": 0.9999977350234985, + "eval_screenspot_inside_bbox": 0.5958333412806193, + "eval_screenspot_loss": 0.6295044422149658, + "eval_screenspot_loss_ce": 0.024287888159354527, + "eval_screenspot_loss_iou": 0.3123779296875, + "eval_screenspot_loss_num": 0.12146504720052083, + "eval_screenspot_loss_xval": 0.607421875, + "eval_screenspot_runtime": 146.5939, + "eval_screenspot_samples_per_second": 0.607, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 824179572, + "step": 8500 + }, + { + "epoch": 0.8310520140789989, + "eval_compot_CIoU": 0.476303294301033, + "eval_compot_GIoU": 0.46657124161720276, + "eval_compot_IoU": 0.5377232134342194, + "eval_compot_MAE_all": 0.08826178312301636, + "eval_compot_MAE_h": 0.06163753941655159, + "eval_compot_MAE_w": 0.11432212963700294, + "eval_compot_MAE_x": 0.11155791208148003, + "eval_compot_MAE_y": 0.06552954763174057, + "eval_compot_NUM_probability": 0.9999933540821075, + "eval_compot_inside_bbox": 0.7552083432674408, + "eval_compot_loss": 0.2848620116710663, + "eval_compot_loss_ce": 0.02143199648708105, + "eval_compot_loss_iou": 0.4400634765625, + "eval_compot_loss_num": 0.046718597412109375, + "eval_compot_loss_xval": 0.233612060546875, + "eval_compot_runtime": 100.642, + "eval_compot_samples_per_second": 0.497, + "eval_compot_steps_per_second": 0.02, + "num_input_tokens_seen": 824179572, + "step": 8500 + }, + { + "epoch": 0.8310520140789989, + "loss": 0.2326662689447403, + "loss_ce": 0.021454118192195892, + "loss_iou": 0.451171875, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 824179572, + "step": 8500 + }, + { + "epoch": 0.8311497849041846, + "grad_norm": 5.745529462332251, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 824275576, + "step": 8501 + }, + { + "epoch": 0.8311497849041846, + "loss": 0.059797778725624084, + "loss_ce": 0.010796086862683296, + "loss_iou": 0.1298828125, + "loss_num": 0.009765625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 824275576, + "step": 8501 + }, + { + "epoch": 0.8312475557293704, + "grad_norm": 3.166191311694031, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 824372644, + "step": 8502 + }, + { + "epoch": 0.8312475557293704, + "loss": 0.04216572269797325, + "loss_ce": 0.0048846835270524025, + "loss_iou": 0.263671875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 824372644, + "step": 8502 + }, + { + "epoch": 0.8313453265545562, + "grad_norm": 8.254774138550363, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 824469776, + "step": 8503 + }, + { + "epoch": 0.8313453265545562, + "loss": 0.06734927743673325, + "loss_ce": 0.0068176607601344585, + "loss_iou": 0.265625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 824469776, + "step": 8503 + }, + { + "epoch": 0.8314430973797419, + "grad_norm": 8.402931039795087, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 824566552, + "step": 8504 + }, + { + "epoch": 0.8314430973797419, + "loss": 0.053420137614011765, + "loss_ce": 0.00559909176081419, + "loss_iou": 0.265625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 824566552, + "step": 8504 + }, + { + "epoch": 0.8315408682049277, + "grad_norm": 2.3844007261781806, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 824663932, + "step": 8505 + }, + { + "epoch": 0.8315408682049277, + "loss": 0.06736084818840027, + "loss_ce": 0.005513157695531845, + "loss_iou": 0.310546875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 824663932, + "step": 8505 + }, + { + "epoch": 0.8316386390301134, + "grad_norm": 2.913180250175298, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 824761196, + "step": 8506 + }, + { + "epoch": 0.8316386390301134, + "loss": 0.09689769148826599, + "loss_ce": 0.00800761766731739, + "loss_iou": 0.267578125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 824761196, + "step": 8506 + }, + { + "epoch": 0.8317364098552992, + "grad_norm": 11.489758533605205, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 824857824, + "step": 8507 + }, + { + "epoch": 0.8317364098552992, + "loss": 0.08992244303226471, + "loss_ce": 0.00808955729007721, + "loss_iou": 0.37109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 824857824, + "step": 8507 + }, + { + "epoch": 0.831834180680485, + "grad_norm": 3.9520156539889797, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 824954728, + "step": 8508 + }, + { + "epoch": 0.831834180680485, + "loss": 0.14023102819919586, + "loss_ce": 0.0048855626955628395, + "loss_iou": 0.2109375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 824954728, + "step": 8508 + }, + { + "epoch": 0.8319319515056707, + "grad_norm": 18.884275546548544, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 825051488, + "step": 8509 + }, + { + "epoch": 0.8319319515056707, + "loss": 0.082803875207901, + "loss_ce": 0.005685952492058277, + "loss_iou": 0.240234375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 825051488, + "step": 8509 + }, + { + "epoch": 0.8320297223308565, + "grad_norm": 12.20382183421487, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 825148264, + "step": 8510 + }, + { + "epoch": 0.8320297223308565, + "loss": 0.06039946526288986, + "loss_ce": 0.003812245326116681, + "loss_iou": 0.3046875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 825148264, + "step": 8510 + }, + { + "epoch": 0.8321274931560423, + "grad_norm": 5.023282342407606, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 825245032, + "step": 8511 + }, + { + "epoch": 0.8321274931560423, + "loss": 0.09364007413387299, + "loss_ce": 0.004040466155856848, + "loss_iou": 0.2578125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 825245032, + "step": 8511 + }, + { + "epoch": 0.832225263981228, + "grad_norm": 15.292826856634477, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 825341452, + "step": 8512 + }, + { + "epoch": 0.832225263981228, + "loss": 0.05472918972373009, + "loss_ce": 0.004955021198838949, + "loss_iou": 0.2333984375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 825341452, + "step": 8512 + }, + { + "epoch": 0.8323230348064138, + "grad_norm": 4.616663265697347, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 825438148, + "step": 8513 + }, + { + "epoch": 0.8323230348064138, + "loss": 0.0549214631319046, + "loss_ce": 0.003537967335432768, + "loss_iou": 0.216796875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 825438148, + "step": 8513 + }, + { + "epoch": 0.8324208056315995, + "grad_norm": 6.399391060991077, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 825535832, + "step": 8514 + }, + { + "epoch": 0.8324208056315995, + "loss": 0.06543275713920593, + "loss_ce": 0.0037986969109624624, + "loss_iou": 0.3046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 825535832, + "step": 8514 + }, + { + "epoch": 0.8325185764567853, + "grad_norm": 8.83933653956399, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 825632992, + "step": 8515 + }, + { + "epoch": 0.8325185764567853, + "loss": 0.0660448968410492, + "loss_ce": 0.003514384850859642, + "loss_iou": 0.287109375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 825632992, + "step": 8515 + }, + { + "epoch": 0.8326163472819711, + "grad_norm": 6.087488359719928, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 825729968, + "step": 8516 + }, + { + "epoch": 0.8326163472819711, + "loss": 0.07552216947078705, + "loss_ce": 0.004500131122767925, + "loss_iou": 0.302734375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 825729968, + "step": 8516 + }, + { + "epoch": 0.8327141181071568, + "grad_norm": 12.076988199400681, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 825827580, + "step": 8517 + }, + { + "epoch": 0.8327141181071568, + "loss": 0.1243591383099556, + "loss_ce": 0.010391242802143097, + "loss_iou": 0.279296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 825827580, + "step": 8517 + }, + { + "epoch": 0.8328118889323426, + "grad_norm": 11.7459173358579, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 825923976, + "step": 8518 + }, + { + "epoch": 0.8328118889323426, + "loss": 0.026109933853149414, + "loss_ce": 0.0028021347243338823, + "loss_iou": 0.271484375, + "loss_num": 0.004669189453125, + "loss_xval": 0.0233154296875, + "num_input_tokens_seen": 825923976, + "step": 8518 + }, + { + "epoch": 0.8329096597575284, + "grad_norm": 17.33574948827483, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 826020688, + "step": 8519 + }, + { + "epoch": 0.8329096597575284, + "loss": 0.10686216503381729, + "loss_ce": 0.006993385963141918, + "loss_iou": 0.2333984375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 826020688, + "step": 8519 + }, + { + "epoch": 0.8330074305827141, + "grad_norm": 6.744775074747523, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 826117444, + "step": 8520 + }, + { + "epoch": 0.8330074305827141, + "loss": 0.11335289478302002, + "loss_ce": 0.00838768295943737, + "loss_iou": 0.2890625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 826117444, + "step": 8520 + }, + { + "epoch": 0.8331052014078999, + "grad_norm": 8.141118979834513, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 826213180, + "step": 8521 + }, + { + "epoch": 0.8331052014078999, + "loss": 0.05968267843127251, + "loss_ce": 0.007360293995589018, + "loss_iou": 0.2138671875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 826213180, + "step": 8521 + }, + { + "epoch": 0.8332029722330856, + "grad_norm": 5.376534120771657, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 826309964, + "step": 8522 + }, + { + "epoch": 0.8332029722330856, + "loss": 0.06006230041384697, + "loss_ce": 0.00445927307009697, + "loss_iou": 0.373046875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 826309964, + "step": 8522 + }, + { + "epoch": 0.8333007430582714, + "grad_norm": 4.223151566025789, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 826406776, + "step": 8523 + }, + { + "epoch": 0.8333007430582714, + "loss": 0.0735371857881546, + "loss_ce": 0.0025113378651440144, + "loss_iou": 0.328125, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 826406776, + "step": 8523 + }, + { + "epoch": 0.8333985138834572, + "grad_norm": 7.466550362966818, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 826503692, + "step": 8524 + }, + { + "epoch": 0.8333985138834572, + "loss": 0.0782555639743805, + "loss_ce": 0.0059288982301950455, + "loss_iou": 0.25390625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 826503692, + "step": 8524 + }, + { + "epoch": 0.8334962847086429, + "grad_norm": 4.209337886358743, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 826600180, + "step": 8525 + }, + { + "epoch": 0.8334962847086429, + "loss": 0.05521003156900406, + "loss_ce": 0.007221140433102846, + "loss_iou": 0.279296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 826600180, + "step": 8525 + }, + { + "epoch": 0.8335940555338287, + "grad_norm": 18.262449284564976, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 826697504, + "step": 8526 + }, + { + "epoch": 0.8335940555338287, + "loss": 0.11218582093715668, + "loss_ce": 0.005153045989573002, + "loss_iou": 0.2490234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 826697504, + "step": 8526 + }, + { + "epoch": 0.8336918263590145, + "grad_norm": 7.672719050807673, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 826794056, + "step": 8527 + }, + { + "epoch": 0.8336918263590145, + "loss": 0.06535300612449646, + "loss_ce": 0.004424666054546833, + "loss_iou": 0.2314453125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 826794056, + "step": 8527 + }, + { + "epoch": 0.8337895971842002, + "grad_norm": 28.757834492869755, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 826890900, + "step": 8528 + }, + { + "epoch": 0.8337895971842002, + "loss": 0.11004026234149933, + "loss_ce": 0.0045638857409358025, + "loss_iou": 0.1884765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 826890900, + "step": 8528 + }, + { + "epoch": 0.833887368009386, + "grad_norm": 11.41533341303275, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 826987600, + "step": 8529 + }, + { + "epoch": 0.833887368009386, + "loss": 0.08793550729751587, + "loss_ce": 0.005873736925423145, + "loss_iou": 0.251953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 826987600, + "step": 8529 + }, + { + "epoch": 0.8339851388345718, + "grad_norm": 7.186789742829601, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 827083628, + "step": 8530 + }, + { + "epoch": 0.8339851388345718, + "loss": 0.08388568460941315, + "loss_ce": 0.002647886984050274, + "loss_iou": 0.21875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 827083628, + "step": 8530 + }, + { + "epoch": 0.8340829096597575, + "grad_norm": 5.0647520032094615, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 827181600, + "step": 8531 + }, + { + "epoch": 0.8340829096597575, + "loss": 0.0683598667383194, + "loss_ce": 0.0023732346016913652, + "loss_iou": 0.185546875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 827181600, + "step": 8531 + }, + { + "epoch": 0.8341806804849433, + "grad_norm": 2.0262063689449326, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 827278704, + "step": 8532 + }, + { + "epoch": 0.8341806804849433, + "loss": 0.030471011996269226, + "loss_ce": 0.004775212146341801, + "loss_iou": 0.220703125, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 827278704, + "step": 8532 + }, + { + "epoch": 0.834278451310129, + "grad_norm": 4.09976861073393, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 827375488, + "step": 8533 + }, + { + "epoch": 0.834278451310129, + "loss": 0.09750965237617493, + "loss_ce": 0.010305673815310001, + "loss_iou": 0.2255859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 827375488, + "step": 8533 + }, + { + "epoch": 0.8343762221353148, + "grad_norm": 2.956241150003937, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 827471972, + "step": 8534 + }, + { + "epoch": 0.8343762221353148, + "loss": 0.03406146541237831, + "loss_ce": 0.001911196974106133, + "loss_iou": 0.32421875, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 827471972, + "step": 8534 + }, + { + "epoch": 0.8344739929605006, + "grad_norm": 5.9360457556632635, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 827569528, + "step": 8535 + }, + { + "epoch": 0.8344739929605006, + "loss": 0.08124160021543503, + "loss_ce": 0.004657734651118517, + "loss_iou": 0.2255859375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 827569528, + "step": 8535 + }, + { + "epoch": 0.8345717637856863, + "grad_norm": 3.099152077615673, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 827666388, + "step": 8536 + }, + { + "epoch": 0.8345717637856863, + "loss": 0.04487022012472153, + "loss_ce": 0.006921611726284027, + "loss_iou": 0.328125, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 827666388, + "step": 8536 + }, + { + "epoch": 0.8346695346108721, + "grad_norm": 10.853251928455347, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 827762960, + "step": 8537 + }, + { + "epoch": 0.8346695346108721, + "loss": 0.10736696422100067, + "loss_ce": 0.005682398099452257, + "loss_iou": 0.2138671875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 827762960, + "step": 8537 + }, + { + "epoch": 0.8347673054360579, + "grad_norm": 7.0714553875766155, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 827859496, + "step": 8538 + }, + { + "epoch": 0.8347673054360579, + "loss": 0.06290938705205917, + "loss_ce": 0.004143972881138325, + "loss_iou": 0.2412109375, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 827859496, + "step": 8538 + }, + { + "epoch": 0.8348650762612436, + "grad_norm": 12.270694491306834, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 827956128, + "step": 8539 + }, + { + "epoch": 0.8348650762612436, + "loss": 0.054891519248485565, + "loss_ce": 0.0037554986774921417, + "loss_iou": 0.2099609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 827956128, + "step": 8539 + }, + { + "epoch": 0.8349628470864294, + "grad_norm": 6.816767324474553, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 828053172, + "step": 8540 + }, + { + "epoch": 0.8349628470864294, + "loss": 0.056874558329582214, + "loss_ce": 0.0031483641359955072, + "loss_iou": 0.2216796875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 828053172, + "step": 8540 + }, + { + "epoch": 0.8350606179116151, + "grad_norm": 6.664777842783001, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 828150008, + "step": 8541 + }, + { + "epoch": 0.8350606179116151, + "loss": 0.06711383163928986, + "loss_ce": 0.005628542043268681, + "loss_iou": 0.298828125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 828150008, + "step": 8541 + }, + { + "epoch": 0.8351583887368009, + "grad_norm": 15.537579814510948, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 828246076, + "step": 8542 + }, + { + "epoch": 0.8351583887368009, + "loss": 0.1251235008239746, + "loss_ce": 0.004212864208966494, + "loss_iou": 0.34765625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 828246076, + "step": 8542 + }, + { + "epoch": 0.8352561595619867, + "grad_norm": 18.0661672238276, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 828343892, + "step": 8543 + }, + { + "epoch": 0.8352561595619867, + "loss": 0.08308132737874985, + "loss_ce": 0.005940518341958523, + "loss_iou": 0.310546875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 828343892, + "step": 8543 + }, + { + "epoch": 0.8353539303871724, + "grad_norm": 11.887571459060943, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 828440652, + "step": 8544 + }, + { + "epoch": 0.8353539303871724, + "loss": 0.09944434463977814, + "loss_ce": 0.007464362308382988, + "loss_iou": 0.32421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 828440652, + "step": 8544 + }, + { + "epoch": 0.8354517012123582, + "grad_norm": 5.061485110318639, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 828537396, + "step": 8545 + }, + { + "epoch": 0.8354517012123582, + "loss": 0.06237722933292389, + "loss_ce": 0.006957304198294878, + "loss_iou": 0.330078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 828537396, + "step": 8545 + }, + { + "epoch": 0.835549472037544, + "grad_norm": 9.365603917455871, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 828634564, + "step": 8546 + }, + { + "epoch": 0.835549472037544, + "loss": 0.07322704792022705, + "loss_ce": 0.005050774198025465, + "loss_iou": 0.2734375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 828634564, + "step": 8546 + }, + { + "epoch": 0.8356472428627297, + "grad_norm": 10.278247179122237, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 828731824, + "step": 8547 + }, + { + "epoch": 0.8356472428627297, + "loss": 0.07230037450790405, + "loss_ce": 0.004280507564544678, + "loss_iou": 0.28125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 828731824, + "step": 8547 + }, + { + "epoch": 0.8357450136879155, + "grad_norm": 2.7580066456652266, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 828828748, + "step": 8548 + }, + { + "epoch": 0.8357450136879155, + "loss": 0.07018977403640747, + "loss_ce": 0.0046837893314659595, + "loss_iou": 0.294921875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 828828748, + "step": 8548 + }, + { + "epoch": 0.8358427845131013, + "grad_norm": 1.5554704273155893, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 828925320, + "step": 8549 + }, + { + "epoch": 0.8358427845131013, + "loss": 0.11661741137504578, + "loss_ce": 0.005472391843795776, + "loss_iou": 0.265625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 828925320, + "step": 8549 + }, + { + "epoch": 0.835940555338287, + "grad_norm": 7.3897832210708065, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 829022680, + "step": 8550 + }, + { + "epoch": 0.835940555338287, + "loss": 0.10763110220432281, + "loss_ce": 0.0031541711650788784, + "loss_iou": 0.302734375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 829022680, + "step": 8550 + }, + { + "epoch": 0.8360383261634728, + "grad_norm": 8.70520208240572, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 829119068, + "step": 8551 + }, + { + "epoch": 0.8360383261634728, + "loss": 0.060486748814582825, + "loss_ce": 0.008938746526837349, + "loss_iou": 0.28515625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 829119068, + "step": 8551 + }, + { + "epoch": 0.8361360969886585, + "grad_norm": 4.280401014388187, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 829216148, + "step": 8552 + }, + { + "epoch": 0.8361360969886585, + "loss": 0.08952628076076508, + "loss_ce": 0.008441075682640076, + "loss_iou": 0.302734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 829216148, + "step": 8552 + }, + { + "epoch": 0.8362338678138443, + "grad_norm": 8.48231101283964, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 829313848, + "step": 8553 + }, + { + "epoch": 0.8362338678138443, + "loss": 0.10488374531269073, + "loss_ce": 0.008707595989108086, + "loss_iou": 0.330078125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 829313848, + "step": 8553 + }, + { + "epoch": 0.8363316386390302, + "grad_norm": 13.28768321116386, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 829410780, + "step": 8554 + }, + { + "epoch": 0.8363316386390302, + "loss": 0.06329850852489471, + "loss_ce": 0.004170704632997513, + "loss_iou": 0.33984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 829410780, + "step": 8554 + }, + { + "epoch": 0.8364294094642158, + "grad_norm": 13.688230367189428, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 829506996, + "step": 8555 + }, + { + "epoch": 0.8364294094642158, + "loss": 0.05297939479351044, + "loss_ce": 0.007225915789604187, + "loss_iou": 0.19140625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 829506996, + "step": 8555 + }, + { + "epoch": 0.8365271802894017, + "grad_norm": 5.654759914048482, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 829603260, + "step": 8556 + }, + { + "epoch": 0.8365271802894017, + "loss": 0.07334505766630173, + "loss_ce": 0.00869356282055378, + "loss_iou": 0.2890625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 829603260, + "step": 8556 + }, + { + "epoch": 0.8366249511145875, + "grad_norm": 14.442426029719606, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 829700460, + "step": 8557 + }, + { + "epoch": 0.8366249511145875, + "loss": 0.07701483368873596, + "loss_ce": 0.0028418605215847492, + "loss_iou": 0.34375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 829700460, + "step": 8557 + }, + { + "epoch": 0.8367227219397732, + "grad_norm": 5.582556122589799, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 829797352, + "step": 8558 + }, + { + "epoch": 0.8367227219397732, + "loss": 0.10060539096593857, + "loss_ce": 0.012165449559688568, + "loss_iou": 0.28125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 829797352, + "step": 8558 + }, + { + "epoch": 0.836820492764959, + "grad_norm": 3.1962310373797753, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 829894460, + "step": 8559 + }, + { + "epoch": 0.836820492764959, + "loss": 0.07279537618160248, + "loss_ce": 0.00616787513718009, + "loss_iou": 0.1728515625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 829894460, + "step": 8559 + }, + { + "epoch": 0.8369182635901447, + "grad_norm": 7.5069576772810525, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 829991024, + "step": 8560 + }, + { + "epoch": 0.8369182635901447, + "loss": 0.04390161484479904, + "loss_ce": 0.004259278532117605, + "loss_iou": 0.2392578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 829991024, + "step": 8560 + }, + { + "epoch": 0.8370160344153305, + "grad_norm": 3.5189864433423366, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 830087588, + "step": 8561 + }, + { + "epoch": 0.8370160344153305, + "loss": 0.035482969135046005, + "loss_ce": 0.0027223494835197926, + "loss_iou": 0.2412109375, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 830087588, + "step": 8561 + }, + { + "epoch": 0.8371138052405163, + "grad_norm": 13.290002223581958, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 830183864, + "step": 8562 + }, + { + "epoch": 0.8371138052405163, + "loss": 0.08973950147628784, + "loss_ce": 0.007616707123816013, + "loss_iou": 0.265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 830183864, + "step": 8562 + }, + { + "epoch": 0.837211576065702, + "grad_norm": 25.73551381050774, + "learning_rate": 5e-05, + "loss": 0.138, + "num_input_tokens_seen": 830280768, + "step": 8563 + }, + { + "epoch": 0.837211576065702, + "loss": 0.18222562968730927, + "loss_ce": 0.014165337197482586, + "loss_iou": 0.33203125, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 830280768, + "step": 8563 + }, + { + "epoch": 0.8373093468908878, + "grad_norm": 27.660532880130937, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 830377664, + "step": 8564 + }, + { + "epoch": 0.8373093468908878, + "loss": 0.09471221268177032, + "loss_ce": 0.012543638236820698, + "loss_iou": 0.2216796875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 830377664, + "step": 8564 + }, + { + "epoch": 0.8374071177160736, + "grad_norm": 17.66861810298703, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 830474248, + "step": 8565 + }, + { + "epoch": 0.8374071177160736, + "loss": 0.06939081102609634, + "loss_ce": 0.007150213234126568, + "loss_iou": 0.2734375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 830474248, + "step": 8565 + }, + { + "epoch": 0.8375048885412593, + "grad_norm": 9.851225668876022, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 830571912, + "step": 8566 + }, + { + "epoch": 0.8375048885412593, + "loss": 0.06420384347438812, + "loss_ce": 0.00557957636192441, + "loss_iou": 0.314453125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 830571912, + "step": 8566 + }, + { + "epoch": 0.8376026593664451, + "grad_norm": 14.96080555730548, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 830668748, + "step": 8567 + }, + { + "epoch": 0.8376026593664451, + "loss": 0.0980309396982193, + "loss_ce": 0.007103816606104374, + "loss_iou": 0.255859375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 830668748, + "step": 8567 + }, + { + "epoch": 0.8377004301916308, + "grad_norm": 17.18760385982697, + "learning_rate": 5e-05, + "loss": 0.1163, + "num_input_tokens_seen": 830766188, + "step": 8568 + }, + { + "epoch": 0.8377004301916308, + "loss": 0.1555279642343521, + "loss_ce": 0.004191294778138399, + "loss_iou": 0.349609375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 830766188, + "step": 8568 + }, + { + "epoch": 0.8377982010168166, + "grad_norm": 9.261551394525766, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 830862444, + "step": 8569 + }, + { + "epoch": 0.8377982010168166, + "loss": 0.07177465409040451, + "loss_ce": 0.0018512557726353407, + "loss_iou": 0.244140625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 830862444, + "step": 8569 + }, + { + "epoch": 0.8378959718420024, + "grad_norm": 6.491719250862728, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 830958968, + "step": 8570 + }, + { + "epoch": 0.8378959718420024, + "loss": 0.03932343050837517, + "loss_ce": 0.007997137494385242, + "loss_iou": 0.2021484375, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 830958968, + "step": 8570 + }, + { + "epoch": 0.8379937426671881, + "grad_norm": 32.880039044029274, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 831055832, + "step": 8571 + }, + { + "epoch": 0.8379937426671881, + "loss": 0.08614514023065567, + "loss_ce": 0.0038087156135588884, + "loss_iou": 0.30078125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 831055832, + "step": 8571 + }, + { + "epoch": 0.8380915134923739, + "grad_norm": 27.197271490933744, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 831152264, + "step": 8572 + }, + { + "epoch": 0.8380915134923739, + "loss": 0.08052971959114075, + "loss_ce": 0.007730034179985523, + "loss_iou": 0.306640625, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 831152264, + "step": 8572 + }, + { + "epoch": 0.8381892843175597, + "grad_norm": 5.047181595836587, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 831249344, + "step": 8573 + }, + { + "epoch": 0.8381892843175597, + "loss": 0.08807625621557236, + "loss_ce": 0.004152918234467506, + "loss_iou": 0.359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 831249344, + "step": 8573 + }, + { + "epoch": 0.8382870551427454, + "grad_norm": 3.2285117207727256, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 831346444, + "step": 8574 + }, + { + "epoch": 0.8382870551427454, + "loss": 0.06752964854240417, + "loss_ce": 0.0034503648057579994, + "loss_iou": 0.26953125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 831346444, + "step": 8574 + }, + { + "epoch": 0.8383848259679312, + "grad_norm": 5.897830302708951, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 831442796, + "step": 8575 + }, + { + "epoch": 0.8383848259679312, + "loss": 0.060925740748643875, + "loss_ce": 0.00565840769559145, + "loss_iou": 0.166015625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 831442796, + "step": 8575 + }, + { + "epoch": 0.838482596793117, + "grad_norm": 4.709713407003269, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 831538884, + "step": 8576 + }, + { + "epoch": 0.838482596793117, + "loss": 0.08461551368236542, + "loss_ce": 0.005403328221291304, + "loss_iou": 0.1787109375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 831538884, + "step": 8576 + }, + { + "epoch": 0.8385803676183027, + "grad_norm": 11.053147289157662, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 831637044, + "step": 8577 + }, + { + "epoch": 0.8385803676183027, + "loss": 0.05662420392036438, + "loss_ce": 0.006117610726505518, + "loss_iou": 0.3046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 831637044, + "step": 8577 + }, + { + "epoch": 0.8386781384434885, + "grad_norm": 3.294955111813545, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 831733184, + "step": 8578 + }, + { + "epoch": 0.8386781384434885, + "loss": 0.07137821614742279, + "loss_ce": 0.00843571126461029, + "loss_iou": 0.2255859375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 831733184, + "step": 8578 + }, + { + "epoch": 0.8387759092686742, + "grad_norm": 14.803730022672926, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 831830232, + "step": 8579 + }, + { + "epoch": 0.8387759092686742, + "loss": 0.0848446786403656, + "loss_ce": 0.003530590794980526, + "loss_iou": 0.353515625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 831830232, + "step": 8579 + }, + { + "epoch": 0.83887368009386, + "grad_norm": 18.463005546852298, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 831927616, + "step": 8580 + }, + { + "epoch": 0.83887368009386, + "loss": 0.09013287723064423, + "loss_ce": 0.0054618557915091515, + "loss_iou": 0.2041015625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 831927616, + "step": 8580 + }, + { + "epoch": 0.8389714509190458, + "grad_norm": 6.171140041149457, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 832024484, + "step": 8581 + }, + { + "epoch": 0.8389714509190458, + "loss": 0.12730327248573303, + "loss_ce": 0.005110898055136204, + "loss_iou": 0.24609375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 832024484, + "step": 8581 + }, + { + "epoch": 0.8390692217442315, + "grad_norm": 6.553163311399546, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 832121400, + "step": 8582 + }, + { + "epoch": 0.8390692217442315, + "loss": 0.05421909689903259, + "loss_ce": 0.004032939672470093, + "loss_iou": 0.2890625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 832121400, + "step": 8582 + }, + { + "epoch": 0.8391669925694173, + "grad_norm": 8.032959749127553, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 832219372, + "step": 8583 + }, + { + "epoch": 0.8391669925694173, + "loss": 0.0730614960193634, + "loss_ce": 0.00303891533985734, + "loss_iou": 0.294921875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 832219372, + "step": 8583 + }, + { + "epoch": 0.8392647633946031, + "grad_norm": 9.84805815908008, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 832315672, + "step": 8584 + }, + { + "epoch": 0.8392647633946031, + "loss": 0.10259830951690674, + "loss_ce": 0.004316451493650675, + "loss_iou": 0.322265625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 832315672, + "step": 8584 + }, + { + "epoch": 0.8393625342197888, + "grad_norm": 13.482649929604166, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 832412812, + "step": 8585 + }, + { + "epoch": 0.8393625342197888, + "loss": 0.07936225086450577, + "loss_ce": 0.005509715527296066, + "loss_iou": 0.29296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 832412812, + "step": 8585 + }, + { + "epoch": 0.8394603050449746, + "grad_norm": 8.091743728063062, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 832510820, + "step": 8586 + }, + { + "epoch": 0.8394603050449746, + "loss": 0.08721549808979034, + "loss_ce": 0.004268720280379057, + "loss_iou": 0.376953125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 832510820, + "step": 8586 + }, + { + "epoch": 0.8395580758701603, + "grad_norm": 7.891609571070373, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 832607000, + "step": 8587 + }, + { + "epoch": 0.8395580758701603, + "loss": 0.09503430128097534, + "loss_ce": 0.0045191626995801926, + "loss_iou": 0.30078125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 832607000, + "step": 8587 + }, + { + "epoch": 0.8396558466953461, + "grad_norm": 8.768599976885419, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 832703436, + "step": 8588 + }, + { + "epoch": 0.8396558466953461, + "loss": 0.035319991409778595, + "loss_ce": 0.0027195895090699196, + "loss_iou": 0.2392578125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 832703436, + "step": 8588 + }, + { + "epoch": 0.8397536175205319, + "grad_norm": 4.429973104006323, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 832799460, + "step": 8589 + }, + { + "epoch": 0.8397536175205319, + "loss": 0.10221529006958008, + "loss_ce": 0.007229332812130451, + "loss_iou": 0.1357421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 832799460, + "step": 8589 + }, + { + "epoch": 0.8398513883457176, + "grad_norm": 6.862488437210941, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 832896736, + "step": 8590 + }, + { + "epoch": 0.8398513883457176, + "loss": 0.09270001202821732, + "loss_ce": 0.0025053066201508045, + "loss_iou": 0.32421875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 832896736, + "step": 8590 + }, + { + "epoch": 0.8399491591709034, + "grad_norm": 13.547737328501668, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 832993788, + "step": 8591 + }, + { + "epoch": 0.8399491591709034, + "loss": 0.11583255231380463, + "loss_ce": 0.009799224324524403, + "loss_iou": 0.1884765625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 832993788, + "step": 8591 + }, + { + "epoch": 0.8400469299960892, + "grad_norm": 8.659494690075311, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 833090124, + "step": 8592 + }, + { + "epoch": 0.8400469299960892, + "loss": 0.0688270628452301, + "loss_ce": 0.005510719493031502, + "loss_iou": 0.314453125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 833090124, + "step": 8592 + }, + { + "epoch": 0.8401447008212749, + "grad_norm": 4.171299372632021, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 833187360, + "step": 8593 + }, + { + "epoch": 0.8401447008212749, + "loss": 0.051424648612737656, + "loss_ce": 0.0031153224408626556, + "loss_iou": 0.28125, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 833187360, + "step": 8593 + }, + { + "epoch": 0.8402424716464607, + "grad_norm": 6.249429533007954, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 833284624, + "step": 8594 + }, + { + "epoch": 0.8402424716464607, + "loss": 0.037598565220832825, + "loss_ce": 0.0015420459676533937, + "loss_iou": 0.24609375, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 833284624, + "step": 8594 + }, + { + "epoch": 0.8403402424716465, + "grad_norm": 20.113087393887966, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 833382096, + "step": 8595 + }, + { + "epoch": 0.8403402424716465, + "loss": 0.05185864493250847, + "loss_ce": 0.006499988026916981, + "loss_iou": 0.2333984375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 833382096, + "step": 8595 + }, + { + "epoch": 0.8404380132968322, + "grad_norm": 12.25301392156476, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 833478448, + "step": 8596 + }, + { + "epoch": 0.8404380132968322, + "loss": 0.08289570361375809, + "loss_ce": 0.005297133699059486, + "loss_iou": 0.421875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 833478448, + "step": 8596 + }, + { + "epoch": 0.840535784122018, + "grad_norm": 6.003880778644608, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 833574604, + "step": 8597 + }, + { + "epoch": 0.840535784122018, + "loss": 0.0725952535867691, + "loss_ce": 0.006448397878557444, + "loss_iou": 0.2333984375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 833574604, + "step": 8597 + }, + { + "epoch": 0.8406335549472037, + "grad_norm": 6.603278315193073, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 833671208, + "step": 8598 + }, + { + "epoch": 0.8406335549472037, + "loss": 0.07323452830314636, + "loss_ce": 0.00833127647638321, + "loss_iou": 0.296875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 833671208, + "step": 8598 + }, + { + "epoch": 0.8407313257723895, + "grad_norm": 4.085183840721522, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 833768360, + "step": 8599 + }, + { + "epoch": 0.8407313257723895, + "loss": 0.057113416492938995, + "loss_ce": 0.005706556141376495, + "loss_iou": 0.263671875, + "loss_num": 0.01025390625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 833768360, + "step": 8599 + }, + { + "epoch": 0.8408290965975753, + "grad_norm": 8.899519317574192, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 833865292, + "step": 8600 + }, + { + "epoch": 0.8408290965975753, + "loss": 0.0530618280172348, + "loss_ce": 0.002387389773502946, + "loss_iou": 0.361328125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 833865292, + "step": 8600 + }, + { + "epoch": 0.840926867422761, + "grad_norm": 5.387363778604739, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 833962124, + "step": 8601 + }, + { + "epoch": 0.840926867422761, + "loss": 0.07670298218727112, + "loss_ce": 0.004666234366595745, + "loss_iou": 0.2421875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 833962124, + "step": 8601 + }, + { + "epoch": 0.8410246382479468, + "grad_norm": 11.937497335770123, + "learning_rate": 5e-05, + "loss": 0.1093, + "num_input_tokens_seen": 834058816, + "step": 8602 + }, + { + "epoch": 0.8410246382479468, + "loss": 0.11764054000377655, + "loss_ce": 0.003359833499416709, + "loss_iou": 0.3359375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 834058816, + "step": 8602 + }, + { + "epoch": 0.8411224090731326, + "grad_norm": 4.893847289196819, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 834155976, + "step": 8603 + }, + { + "epoch": 0.8411224090731326, + "loss": 0.09788784384727478, + "loss_ce": 0.0032375743612647057, + "loss_iou": 0.275390625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 834155976, + "step": 8603 + }, + { + "epoch": 0.8412201798983183, + "grad_norm": 11.712057454648766, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 834252824, + "step": 8604 + }, + { + "epoch": 0.8412201798983183, + "loss": 0.04906081408262253, + "loss_ce": 0.0035819937475025654, + "loss_iou": 0.275390625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 834252824, + "step": 8604 + }, + { + "epoch": 0.8413179507235041, + "grad_norm": 4.230090001537895, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 834350164, + "step": 8605 + }, + { + "epoch": 0.8413179507235041, + "loss": 0.10584791004657745, + "loss_ce": 0.009046153165400028, + "loss_iou": 0.265625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 834350164, + "step": 8605 + }, + { + "epoch": 0.8414157215486898, + "grad_norm": 1.6557486996006332, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 834446440, + "step": 8606 + }, + { + "epoch": 0.8414157215486898, + "loss": 0.11657659709453583, + "loss_ce": 0.004638124257326126, + "loss_iou": 0.26953125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 834446440, + "step": 8606 + }, + { + "epoch": 0.8415134923738756, + "grad_norm": 2.6459810343331833, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 834543252, + "step": 8607 + }, + { + "epoch": 0.8415134923738756, + "loss": 0.03339535742998123, + "loss_ce": 0.005021636839956045, + "loss_iou": 0.21875, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 834543252, + "step": 8607 + }, + { + "epoch": 0.8416112631990614, + "grad_norm": 6.8926635632143745, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 834640512, + "step": 8608 + }, + { + "epoch": 0.8416112631990614, + "loss": 0.08727747201919556, + "loss_ce": 0.0037203466054052114, + "loss_iou": 0.21484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 834640512, + "step": 8608 + }, + { + "epoch": 0.8417090340242471, + "grad_norm": 17.554262104671736, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 834738272, + "step": 8609 + }, + { + "epoch": 0.8417090340242471, + "loss": 0.08351487666368484, + "loss_ce": 0.003162093460559845, + "loss_iou": 0.265625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 834738272, + "step": 8609 + }, + { + "epoch": 0.8418068048494329, + "grad_norm": 11.794947732721386, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 834834844, + "step": 8610 + }, + { + "epoch": 0.8418068048494329, + "loss": 0.04325712099671364, + "loss_ce": 0.0027984431944787502, + "loss_iou": 0.30859375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 834834844, + "step": 8610 + }, + { + "epoch": 0.8419045756746187, + "grad_norm": 14.11474202330235, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 834931856, + "step": 8611 + }, + { + "epoch": 0.8419045756746187, + "loss": 0.05732189863920212, + "loss_ce": 0.008829467929899693, + "loss_iou": 0.255859375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 834931856, + "step": 8611 + }, + { + "epoch": 0.8420023464998044, + "grad_norm": 3.853981047069926, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 835028548, + "step": 8612 + }, + { + "epoch": 0.8420023464998044, + "loss": 0.10918353497982025, + "loss_ce": 0.005698423832654953, + "loss_iou": 0.283203125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 835028548, + "step": 8612 + }, + { + "epoch": 0.8421001173249902, + "grad_norm": 7.1591832818562535, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 835126244, + "step": 8613 + }, + { + "epoch": 0.8421001173249902, + "loss": 0.06364219635725021, + "loss_ce": 0.005414657760411501, + "loss_iou": 0.37890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 835126244, + "step": 8613 + }, + { + "epoch": 0.8421978881501759, + "grad_norm": 14.910139763315039, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 835220844, + "step": 8614 + }, + { + "epoch": 0.8421978881501759, + "loss": 0.10729604214429855, + "loss_ce": 0.009113364852964878, + "loss_iou": 0.3125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 835220844, + "step": 8614 + }, + { + "epoch": 0.8422956589753617, + "grad_norm": 18.326523680495434, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 835317456, + "step": 8615 + }, + { + "epoch": 0.8422956589753617, + "loss": 0.07725018262863159, + "loss_ce": 0.0029246218036860228, + "loss_iou": 0.3203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 835317456, + "step": 8615 + }, + { + "epoch": 0.8423934298005475, + "grad_norm": 14.139348325952628, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 835414408, + "step": 8616 + }, + { + "epoch": 0.8423934298005475, + "loss": 0.05677168816328049, + "loss_ce": 0.002763204975053668, + "loss_iou": 0.27734375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 835414408, + "step": 8616 + }, + { + "epoch": 0.8424912006257332, + "grad_norm": 5.678835457174395, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 835511464, + "step": 8617 + }, + { + "epoch": 0.8424912006257332, + "loss": 0.09153281897306442, + "loss_ce": 0.006587138865143061, + "loss_iou": 0.3359375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 835511464, + "step": 8617 + }, + { + "epoch": 0.842588971450919, + "grad_norm": 4.674937017847556, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 835608264, + "step": 8618 + }, + { + "epoch": 0.842588971450919, + "loss": 0.05163707584142685, + "loss_ce": 0.004914662800729275, + "loss_iou": 0.359375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 835608264, + "step": 8618 + }, + { + "epoch": 0.8426867422761048, + "grad_norm": 10.161564277341235, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 835706212, + "step": 8619 + }, + { + "epoch": 0.8426867422761048, + "loss": 0.10251116752624512, + "loss_ce": 0.005770446732640266, + "loss_iou": 0.3671875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 835706212, + "step": 8619 + }, + { + "epoch": 0.8427845131012905, + "grad_norm": 8.214661456752209, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 835802524, + "step": 8620 + }, + { + "epoch": 0.8427845131012905, + "loss": 0.07875170558691025, + "loss_ce": 0.008973261341452599, + "loss_iou": 0.359375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 835802524, + "step": 8620 + }, + { + "epoch": 0.8428822839264764, + "grad_norm": 12.421673177233682, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 835899224, + "step": 8621 + }, + { + "epoch": 0.8428822839264764, + "loss": 0.07441405951976776, + "loss_ce": 0.006169122643768787, + "loss_iou": 0.24609375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 835899224, + "step": 8621 + }, + { + "epoch": 0.8429800547516622, + "grad_norm": 10.823233777689431, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 835995796, + "step": 8622 + }, + { + "epoch": 0.8429800547516622, + "loss": 0.05191866308450699, + "loss_ce": 0.005348839797079563, + "loss_iou": 0.28515625, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 835995796, + "step": 8622 + }, + { + "epoch": 0.8430778255768479, + "grad_norm": 9.56329129937554, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 836092176, + "step": 8623 + }, + { + "epoch": 0.8430778255768479, + "loss": 0.05014895647764206, + "loss_ce": 0.004296293016523123, + "loss_iou": 0.30078125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 836092176, + "step": 8623 + }, + { + "epoch": 0.8431755964020337, + "grad_norm": 4.104077547363998, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 836188628, + "step": 8624 + }, + { + "epoch": 0.8431755964020337, + "loss": 0.07071839272975922, + "loss_ce": 0.004556282423436642, + "loss_iou": 0.154296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 836188628, + "step": 8624 + }, + { + "epoch": 0.8432733672272194, + "grad_norm": 3.0983417783748206, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 836285316, + "step": 8625 + }, + { + "epoch": 0.8432733672272194, + "loss": 0.059464242309331894, + "loss_ce": 0.005490091163665056, + "loss_iou": 0.2490234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 836285316, + "step": 8625 + }, + { + "epoch": 0.8433711380524052, + "grad_norm": 3.7414886715676605, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 836382588, + "step": 8626 + }, + { + "epoch": 0.8433711380524052, + "loss": 0.07473303377628326, + "loss_ce": 0.007525690831243992, + "loss_iou": 0.27734375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 836382588, + "step": 8626 + }, + { + "epoch": 0.843468908877591, + "grad_norm": 5.3039304024989, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 836479728, + "step": 8627 + }, + { + "epoch": 0.843468908877591, + "loss": 0.05389489233493805, + "loss_ce": 0.0032433453015983105, + "loss_iou": 0.306640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 836479728, + "step": 8627 + }, + { + "epoch": 0.8435666797027767, + "grad_norm": 6.908535511390692, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 836577644, + "step": 8628 + }, + { + "epoch": 0.8435666797027767, + "loss": 0.045606546103954315, + "loss_ce": 0.005659036338329315, + "loss_iou": 0.27734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 836577644, + "step": 8628 + }, + { + "epoch": 0.8436644505279625, + "grad_norm": 5.649821105327791, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 836674696, + "step": 8629 + }, + { + "epoch": 0.8436644505279625, + "loss": 0.05699232593178749, + "loss_ce": 0.005522528197616339, + "loss_iou": 0.1806640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 836674696, + "step": 8629 + }, + { + "epoch": 0.8437622213531483, + "grad_norm": 7.411232590306828, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 836771908, + "step": 8630 + }, + { + "epoch": 0.8437622213531483, + "loss": 0.050650984048843384, + "loss_ce": 0.0032953317277133465, + "loss_iou": 0.279296875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 836771908, + "step": 8630 + }, + { + "epoch": 0.843859992178334, + "grad_norm": 6.029028382624504, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 836868836, + "step": 8631 + }, + { + "epoch": 0.843859992178334, + "loss": 0.05040246993303299, + "loss_ce": 0.004061521030962467, + "loss_iou": 0.28515625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 836868836, + "step": 8631 + }, + { + "epoch": 0.8439577630035198, + "grad_norm": 5.296702923570372, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 836965900, + "step": 8632 + }, + { + "epoch": 0.8439577630035198, + "loss": 0.032366856932640076, + "loss_ce": 0.0036574448458850384, + "loss_iou": 0.26171875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 836965900, + "step": 8632 + }, + { + "epoch": 0.8440555338287055, + "grad_norm": 2.9782017370419003, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 837062384, + "step": 8633 + }, + { + "epoch": 0.8440555338287055, + "loss": 0.03497515618801117, + "loss_ce": 0.004915339406579733, + "loss_iou": 0.2578125, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 837062384, + "step": 8633 + }, + { + "epoch": 0.8441533046538913, + "grad_norm": 46.490093145883115, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 837158856, + "step": 8634 + }, + { + "epoch": 0.8441533046538913, + "loss": 0.08590669184923172, + "loss_ce": 0.0021817132364958525, + "loss_iou": 0.314453125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 837158856, + "step": 8634 + }, + { + "epoch": 0.8442510754790771, + "grad_norm": 11.599362115802991, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 837255492, + "step": 8635 + }, + { + "epoch": 0.8442510754790771, + "loss": 0.06458999216556549, + "loss_ce": 0.005382077302783728, + "loss_iou": 0.267578125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 837255492, + "step": 8635 + }, + { + "epoch": 0.8443488463042628, + "grad_norm": 15.834883141906671, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 837351368, + "step": 8636 + }, + { + "epoch": 0.8443488463042628, + "loss": 0.10476750135421753, + "loss_ce": 0.0074316831305623055, + "loss_iou": 0.2451171875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 837351368, + "step": 8636 + }, + { + "epoch": 0.8444466171294486, + "grad_norm": 49.367345800645175, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 837448392, + "step": 8637 + }, + { + "epoch": 0.8444466171294486, + "loss": 0.10670268535614014, + "loss_ce": 0.009260062128305435, + "loss_iou": 0.1953125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 837448392, + "step": 8637 + }, + { + "epoch": 0.8445443879546344, + "grad_norm": 54.33430470058373, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 837546208, + "step": 8638 + }, + { + "epoch": 0.8445443879546344, + "loss": 0.06882180273532867, + "loss_ce": 0.004917996935546398, + "loss_iou": 0.357421875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 837546208, + "step": 8638 + }, + { + "epoch": 0.8446421587798201, + "grad_norm": 12.31821195072192, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 837642576, + "step": 8639 + }, + { + "epoch": 0.8446421587798201, + "loss": 0.06871478259563446, + "loss_ce": 0.008717223070561886, + "loss_iou": 0.326171875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 837642576, + "step": 8639 + }, + { + "epoch": 0.8447399296050059, + "grad_norm": 51.28842760117986, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 837738884, + "step": 8640 + }, + { + "epoch": 0.8447399296050059, + "loss": 0.04700188711285591, + "loss_ce": 0.005757380276918411, + "loss_iou": 0.220703125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 837738884, + "step": 8640 + }, + { + "epoch": 0.8448377004301917, + "grad_norm": 22.10427655102913, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 837836012, + "step": 8641 + }, + { + "epoch": 0.8448377004301917, + "loss": 0.09556093066930771, + "loss_ce": 0.01192750409245491, + "loss_iou": 0.30078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 837836012, + "step": 8641 + }, + { + "epoch": 0.8449354712553774, + "grad_norm": 9.29340330060795, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 837932972, + "step": 8642 + }, + { + "epoch": 0.8449354712553774, + "loss": 0.07199829816818237, + "loss_ce": 0.007547083310782909, + "loss_iou": 0.244140625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 837932972, + "step": 8642 + }, + { + "epoch": 0.8450332420805632, + "grad_norm": 13.655542730915426, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 838029784, + "step": 8643 + }, + { + "epoch": 0.8450332420805632, + "loss": 0.04924147203564644, + "loss_ce": 0.005082534626126289, + "loss_iou": 0.234375, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 838029784, + "step": 8643 + }, + { + "epoch": 0.8451310129057489, + "grad_norm": 13.892855425903049, + "learning_rate": 5e-05, + "loss": 0.0966, + "num_input_tokens_seen": 838127376, + "step": 8644 + }, + { + "epoch": 0.8451310129057489, + "loss": 0.09740649908781052, + "loss_ce": 0.006754026748239994, + "loss_iou": 0.31640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 838127376, + "step": 8644 + }, + { + "epoch": 0.8452287837309347, + "grad_norm": 2.816435364814616, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 838223896, + "step": 8645 + }, + { + "epoch": 0.8452287837309347, + "loss": 0.10760150849819183, + "loss_ce": 0.0036891591735184193, + "loss_iou": 0.11083984375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 838223896, + "step": 8645 + }, + { + "epoch": 0.8453265545561205, + "grad_norm": 13.078943147626031, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 838321044, + "step": 8646 + }, + { + "epoch": 0.8453265545561205, + "loss": 0.07024024426937103, + "loss_ce": 0.009380567818880081, + "loss_iou": 0.3125, + "loss_num": 0.01220703125, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 838321044, + "step": 8646 + }, + { + "epoch": 0.8454243253813062, + "grad_norm": 16.85800336199227, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 838417352, + "step": 8647 + }, + { + "epoch": 0.8454243253813062, + "loss": 0.06703050434589386, + "loss_ce": 0.005003525875508785, + "loss_iou": 0.3203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 838417352, + "step": 8647 + }, + { + "epoch": 0.845522096206492, + "grad_norm": 2.0382537416246804, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 838515544, + "step": 8648 + }, + { + "epoch": 0.845522096206492, + "loss": 0.09745507687330246, + "loss_ce": 0.0076246801763772964, + "loss_iou": 0.19921875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 838515544, + "step": 8648 + }, + { + "epoch": 0.8456198670316778, + "grad_norm": 4.608366978982522, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 838612204, + "step": 8649 + }, + { + "epoch": 0.8456198670316778, + "loss": 0.04838275909423828, + "loss_ce": 0.0033235521987080574, + "loss_iou": 0.298828125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 838612204, + "step": 8649 + }, + { + "epoch": 0.8457176378568635, + "grad_norm": 2.256885463521776, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 838708992, + "step": 8650 + }, + { + "epoch": 0.8457176378568635, + "loss": 0.07292776554822922, + "loss_ce": 0.006277372594922781, + "loss_iou": 0.28125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 838708992, + "step": 8650 + }, + { + "epoch": 0.8458154086820493, + "grad_norm": 13.357337033698444, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 838805892, + "step": 8651 + }, + { + "epoch": 0.8458154086820493, + "loss": 0.07068417966365814, + "loss_ce": 0.0032861109357327223, + "loss_iou": 0.35546875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 838805892, + "step": 8651 + }, + { + "epoch": 0.845913179507235, + "grad_norm": 18.11995122063749, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 838902460, + "step": 8652 + }, + { + "epoch": 0.845913179507235, + "loss": 0.07926564663648605, + "loss_ce": 0.0044060321524739265, + "loss_iou": 0.34375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 838902460, + "step": 8652 + }, + { + "epoch": 0.8460109503324208, + "grad_norm": 3.597843633491288, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 838999092, + "step": 8653 + }, + { + "epoch": 0.8460109503324208, + "loss": 0.06390054523944855, + "loss_ce": 0.008015234023332596, + "loss_iou": 0.30078125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 838999092, + "step": 8653 + }, + { + "epoch": 0.8461087211576066, + "grad_norm": 3.0221179836163414, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 839095688, + "step": 8654 + }, + { + "epoch": 0.8461087211576066, + "loss": 0.06414336711168289, + "loss_ce": 0.007922357879579067, + "loss_iou": 0.279296875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 839095688, + "step": 8654 + }, + { + "epoch": 0.8462064919827923, + "grad_norm": 4.555055189804396, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 839192024, + "step": 8655 + }, + { + "epoch": 0.8462064919827923, + "loss": 0.08267028629779816, + "loss_ce": 0.0034161319490522146, + "loss_iou": 0.2578125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 839192024, + "step": 8655 + }, + { + "epoch": 0.8463042628079781, + "grad_norm": 7.609241099471072, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 839289464, + "step": 8656 + }, + { + "epoch": 0.8463042628079781, + "loss": 0.11669206619262695, + "loss_ce": 0.005615713074803352, + "loss_iou": 0.26171875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 839289464, + "step": 8656 + }, + { + "epoch": 0.8464020336331639, + "grad_norm": 7.2776792565144985, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 839387752, + "step": 8657 + }, + { + "epoch": 0.8464020336331639, + "loss": 0.08763830363750458, + "loss_ce": 0.006324217654764652, + "loss_iou": 0.265625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 839387752, + "step": 8657 + }, + { + "epoch": 0.8464998044583496, + "grad_norm": 5.746533471928675, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 839485048, + "step": 8658 + }, + { + "epoch": 0.8464998044583496, + "loss": 0.08554013073444366, + "loss_ce": 0.008529018610715866, + "loss_iou": 0.25390625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 839485048, + "step": 8658 + }, + { + "epoch": 0.8465975752835354, + "grad_norm": 16.107579147670513, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 839581608, + "step": 8659 + }, + { + "epoch": 0.8465975752835354, + "loss": 0.06451106071472168, + "loss_ce": 0.005764723289757967, + "loss_iou": 0.3203125, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 839581608, + "step": 8659 + }, + { + "epoch": 0.8466953461087211, + "grad_norm": 5.333677605736126, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 839678124, + "step": 8660 + }, + { + "epoch": 0.8466953461087211, + "loss": 0.05462057888507843, + "loss_ce": 0.004373385105282068, + "loss_iou": 0.26953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 839678124, + "step": 8660 + }, + { + "epoch": 0.8467931169339069, + "grad_norm": 6.602292596644676, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 839774936, + "step": 8661 + }, + { + "epoch": 0.8467931169339069, + "loss": 0.08251036703586578, + "loss_ce": 0.006582626607269049, + "loss_iou": 0.349609375, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 839774936, + "step": 8661 + }, + { + "epoch": 0.8468908877590927, + "grad_norm": 25.922732633177514, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 839872496, + "step": 8662 + }, + { + "epoch": 0.8468908877590927, + "loss": 0.13157975673675537, + "loss_ce": 0.0058015501126646996, + "loss_iou": 0.33203125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 839872496, + "step": 8662 + }, + { + "epoch": 0.8469886585842784, + "grad_norm": 4.3979362498163574, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 839970032, + "step": 8663 + }, + { + "epoch": 0.8469886585842784, + "loss": 0.03949473053216934, + "loss_ce": 0.003556466195732355, + "loss_iou": 0.27734375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 839970032, + "step": 8663 + }, + { + "epoch": 0.8470864294094642, + "grad_norm": 32.46328487408294, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 840066304, + "step": 8664 + }, + { + "epoch": 0.8470864294094642, + "loss": 0.03842449188232422, + "loss_ce": 0.004977226257324219, + "loss_iou": 0.302734375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 840066304, + "step": 8664 + }, + { + "epoch": 0.84718420023465, + "grad_norm": 11.155887598183869, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 840163332, + "step": 8665 + }, + { + "epoch": 0.84718420023465, + "loss": 0.08793622255325317, + "loss_ce": 0.0064390250481665134, + "loss_iou": 0.3125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 840163332, + "step": 8665 + }, + { + "epoch": 0.8472819710598357, + "grad_norm": 3.7143293890463642, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 840259844, + "step": 8666 + }, + { + "epoch": 0.8472819710598357, + "loss": 0.04758452624082565, + "loss_ce": 0.003921502269804478, + "loss_iou": 0.35546875, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 840259844, + "step": 8666 + }, + { + "epoch": 0.8473797418850215, + "grad_norm": 6.550152710775746, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 840356820, + "step": 8667 + }, + { + "epoch": 0.8473797418850215, + "loss": 0.07767175883054733, + "loss_ce": 0.007248633541166782, + "loss_iou": 0.283203125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 840356820, + "step": 8667 + }, + { + "epoch": 0.8474775127102073, + "grad_norm": 8.359042752090849, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 840453940, + "step": 8668 + }, + { + "epoch": 0.8474775127102073, + "loss": 0.048075418919324875, + "loss_ce": 0.0034739794209599495, + "loss_iou": 0.283203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 840453940, + "step": 8668 + }, + { + "epoch": 0.847575283535393, + "grad_norm": 5.1850754556305185, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 840550432, + "step": 8669 + }, + { + "epoch": 0.847575283535393, + "loss": 0.06649637222290039, + "loss_ce": 0.006582741625607014, + "loss_iou": 0.28125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 840550432, + "step": 8669 + }, + { + "epoch": 0.8476730543605788, + "grad_norm": 7.795735193769483, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 840647744, + "step": 8670 + }, + { + "epoch": 0.8476730543605788, + "loss": 0.08638398349285126, + "loss_ce": 0.007465524598956108, + "loss_iou": 0.306640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 840647744, + "step": 8670 + }, + { + "epoch": 0.8477708251857645, + "grad_norm": 12.805060726255109, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 840743616, + "step": 8671 + }, + { + "epoch": 0.8477708251857645, + "loss": 0.04121149331331253, + "loss_ce": 0.00210703001357615, + "loss_iou": 0.216796875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 840743616, + "step": 8671 + }, + { + "epoch": 0.8478685960109503, + "grad_norm": 7.785883049263782, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 840839444, + "step": 8672 + }, + { + "epoch": 0.8478685960109503, + "loss": 0.08137752115726471, + "loss_ce": 0.009691725485026836, + "loss_iou": 0.27734375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 840839444, + "step": 8672 + }, + { + "epoch": 0.8479663668361361, + "grad_norm": 9.65742323531938, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 840937392, + "step": 8673 + }, + { + "epoch": 0.8479663668361361, + "loss": 0.07886989414691925, + "loss_ce": 0.0034304396249353886, + "loss_iou": 0.359375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 840937392, + "step": 8673 + }, + { + "epoch": 0.8480641376613218, + "grad_norm": 17.774974508687503, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 841035296, + "step": 8674 + }, + { + "epoch": 0.8480641376613218, + "loss": 0.087206169962883, + "loss_ce": 0.0075400290079414845, + "loss_iou": 0.3515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 841035296, + "step": 8674 + }, + { + "epoch": 0.8481619084865076, + "grad_norm": 6.997849071129032, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 841131664, + "step": 8675 + }, + { + "epoch": 0.8481619084865076, + "loss": 0.08038143813610077, + "loss_ce": 0.01068691723048687, + "loss_iou": 0.259765625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 841131664, + "step": 8675 + }, + { + "epoch": 0.8482596793116934, + "grad_norm": 6.3486574282210455, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 841228708, + "step": 8676 + }, + { + "epoch": 0.8482596793116934, + "loss": 0.15531834959983826, + "loss_ce": 0.007765863090753555, + "loss_iou": 0.310546875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 841228708, + "step": 8676 + }, + { + "epoch": 0.8483574501368791, + "grad_norm": 3.3489261611072743, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 841324076, + "step": 8677 + }, + { + "epoch": 0.8483574501368791, + "loss": 0.05040953308343887, + "loss_ce": 0.006677842698991299, + "loss_iou": 0.24609375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 841324076, + "step": 8677 + }, + { + "epoch": 0.8484552209620649, + "grad_norm": 19.982041924477528, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 841420780, + "step": 8678 + }, + { + "epoch": 0.8484552209620649, + "loss": 0.10116711258888245, + "loss_ce": 0.0040296586230397224, + "loss_iou": 0.333984375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 841420780, + "step": 8678 + }, + { + "epoch": 0.8485529917872506, + "grad_norm": 7.782966790852033, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 841517428, + "step": 8679 + }, + { + "epoch": 0.8485529917872506, + "loss": 0.11555463075637817, + "loss_ce": 0.0079496493563056, + "loss_iou": 0.244140625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 841517428, + "step": 8679 + }, + { + "epoch": 0.8486507626124364, + "grad_norm": 5.540457201386006, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 841612556, + "step": 8680 + }, + { + "epoch": 0.8486507626124364, + "loss": 0.07736781984567642, + "loss_ce": 0.006292385049164295, + "loss_iou": 0.185546875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 841612556, + "step": 8680 + }, + { + "epoch": 0.8487485334376222, + "grad_norm": 10.998796971463527, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 841708764, + "step": 8681 + }, + { + "epoch": 0.8487485334376222, + "loss": 0.0726609155535698, + "loss_ce": 0.002539148321375251, + "loss_iou": 0.2890625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 841708764, + "step": 8681 + }, + { + "epoch": 0.8488463042628079, + "grad_norm": 10.262686834745814, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 841805236, + "step": 8682 + }, + { + "epoch": 0.8488463042628079, + "loss": 0.03813396394252777, + "loss_ce": 0.0034583653323352337, + "loss_iou": 0.169921875, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 841805236, + "step": 8682 + }, + { + "epoch": 0.8489440750879937, + "grad_norm": 19.051687375369706, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 841903008, + "step": 8683 + }, + { + "epoch": 0.8489440750879937, + "loss": 0.07590467482805252, + "loss_ce": 0.00821668840944767, + "loss_iou": 0.25, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 841903008, + "step": 8683 + }, + { + "epoch": 0.8490418459131795, + "grad_norm": 4.218715079205367, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 841999980, + "step": 8684 + }, + { + "epoch": 0.8490418459131795, + "loss": 0.07585646957159042, + "loss_ce": 0.006016986910253763, + "loss_iou": 0.26953125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 841999980, + "step": 8684 + }, + { + "epoch": 0.8491396167383652, + "grad_norm": 19.39136290431757, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 842097792, + "step": 8685 + }, + { + "epoch": 0.8491396167383652, + "loss": 0.04451959207653999, + "loss_ce": 0.001444028690457344, + "loss_iou": 0.33203125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 842097792, + "step": 8685 + }, + { + "epoch": 0.849237387563551, + "grad_norm": 5.362442249415972, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 842195992, + "step": 8686 + }, + { + "epoch": 0.849237387563551, + "loss": 0.06396061927080154, + "loss_ce": 0.004680223297327757, + "loss_iou": 0.40234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 842195992, + "step": 8686 + }, + { + "epoch": 0.8493351583887369, + "grad_norm": 4.669291664249778, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 842293428, + "step": 8687 + }, + { + "epoch": 0.8493351583887369, + "loss": 0.09687957167625427, + "loss_ce": 0.007188403978943825, + "loss_iou": 0.34375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 842293428, + "step": 8687 + }, + { + "epoch": 0.8494329292139225, + "grad_norm": 11.819247849798412, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 842389452, + "step": 8688 + }, + { + "epoch": 0.8494329292139225, + "loss": 0.051486700773239136, + "loss_ce": 0.0046879928559064865, + "loss_iou": 0.291015625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 842389452, + "step": 8688 + }, + { + "epoch": 0.8495307000391084, + "grad_norm": 9.301893319669487, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 842486060, + "step": 8689 + }, + { + "epoch": 0.8495307000391084, + "loss": 0.08018346130847931, + "loss_ce": 0.004644821397960186, + "loss_iou": 0.267578125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 842486060, + "step": 8689 + }, + { + "epoch": 0.849628470864294, + "grad_norm": 2.199297937588856, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 842582768, + "step": 8690 + }, + { + "epoch": 0.849628470864294, + "loss": 0.0414268784224987, + "loss_ce": 0.005961637478321791, + "loss_iou": 0.19921875, + "loss_num": 0.007110595703125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 842582768, + "step": 8690 + }, + { + "epoch": 0.8497262416894799, + "grad_norm": 10.584930518598734, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 842680004, + "step": 8691 + }, + { + "epoch": 0.8497262416894799, + "loss": 0.03614109382033348, + "loss_ce": 0.005707437638193369, + "loss_iou": 0.267578125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 842680004, + "step": 8691 + }, + { + "epoch": 0.8498240125146657, + "grad_norm": 5.8286883201460515, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 842776972, + "step": 8692 + }, + { + "epoch": 0.8498240125146657, + "loss": 0.07829460501670837, + "loss_ce": 0.0071581327356398106, + "loss_iou": 0.2578125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 842776972, + "step": 8692 + }, + { + "epoch": 0.8499217833398514, + "grad_norm": 3.480571734124352, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 842872656, + "step": 8693 + }, + { + "epoch": 0.8499217833398514, + "loss": 0.03239068016409874, + "loss_ce": 0.003666010219603777, + "loss_iou": 0.197265625, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 842872656, + "step": 8693 + }, + { + "epoch": 0.8500195541650372, + "grad_norm": 2.5434026123169833, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 842968516, + "step": 8694 + }, + { + "epoch": 0.8500195541650372, + "loss": 0.06474627554416656, + "loss_ce": 0.00509203877300024, + "loss_iou": 0.234375, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 842968516, + "step": 8694 + }, + { + "epoch": 0.850117324990223, + "grad_norm": 3.2212203560607953, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 843065272, + "step": 8695 + }, + { + "epoch": 0.850117324990223, + "loss": 0.07287369668483734, + "loss_ce": 0.006250965408980846, + "loss_iou": 0.23046875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 843065272, + "step": 8695 + }, + { + "epoch": 0.8502150958154087, + "grad_norm": 2.9275075033186386, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 843163064, + "step": 8696 + }, + { + "epoch": 0.8502150958154087, + "loss": 0.054720696061849594, + "loss_ce": 0.004771050065755844, + "loss_iou": 0.2578125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 843163064, + "step": 8696 + }, + { + "epoch": 0.8503128666405945, + "grad_norm": 21.960862981775595, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 843260464, + "step": 8697 + }, + { + "epoch": 0.8503128666405945, + "loss": 0.12268555909395218, + "loss_ce": 0.010396130383014679, + "loss_iou": 0.349609375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 843260464, + "step": 8697 + }, + { + "epoch": 0.8504106374657802, + "grad_norm": 28.161962017852005, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 843357692, + "step": 8698 + }, + { + "epoch": 0.8504106374657802, + "loss": 0.08074192702770233, + "loss_ce": 0.007225087843835354, + "loss_iou": 0.427734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 843357692, + "step": 8698 + }, + { + "epoch": 0.850508408290966, + "grad_norm": 13.186530677195902, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 843454800, + "step": 8699 + }, + { + "epoch": 0.850508408290966, + "loss": 0.07903044670820236, + "loss_ce": 0.006272720638662577, + "loss_iou": 0.318359375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 843454800, + "step": 8699 + }, + { + "epoch": 0.8506061791161518, + "grad_norm": 8.041426297708709, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 843550760, + "step": 8700 + }, + { + "epoch": 0.8506061791161518, + "loss": 0.04757041484117508, + "loss_ce": 0.005532451905310154, + "loss_iou": 0.1572265625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 843550760, + "step": 8700 + }, + { + "epoch": 0.8507039499413375, + "grad_norm": 4.256939457259129, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 843646676, + "step": 8701 + }, + { + "epoch": 0.8507039499413375, + "loss": 0.06737541407346725, + "loss_ce": 0.006157153286039829, + "loss_iou": 0.306640625, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 843646676, + "step": 8701 + }, + { + "epoch": 0.8508017207665233, + "grad_norm": 3.9324705862151705, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 843743540, + "step": 8702 + }, + { + "epoch": 0.8508017207665233, + "loss": 0.067379429936409, + "loss_ce": 0.003673986066132784, + "loss_iou": 0.224609375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 843743540, + "step": 8702 + }, + { + "epoch": 0.8508994915917091, + "grad_norm": 6.224483756161503, + "learning_rate": 5e-05, + "loss": 0.1181, + "num_input_tokens_seen": 843840344, + "step": 8703 + }, + { + "epoch": 0.8508994915917091, + "loss": 0.11764504015445709, + "loss_ce": 0.0035398148465901613, + "loss_iou": 0.353515625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 843840344, + "step": 8703 + }, + { + "epoch": 0.8509972624168948, + "grad_norm": 3.77755462955019, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 843937268, + "step": 8704 + }, + { + "epoch": 0.8509972624168948, + "loss": 0.07508246600627899, + "loss_ce": 0.0013825177447870374, + "loss_iou": 0.318359375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 843937268, + "step": 8704 + }, + { + "epoch": 0.8510950332420806, + "grad_norm": 5.02548498085976, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 844034216, + "step": 8705 + }, + { + "epoch": 0.8510950332420806, + "loss": 0.09426014125347137, + "loss_ce": 0.0043553635478019714, + "loss_iou": 0.298828125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 844034216, + "step": 8705 + }, + { + "epoch": 0.8511928040672663, + "grad_norm": 5.857218416352319, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 844131308, + "step": 8706 + }, + { + "epoch": 0.8511928040672663, + "loss": 0.06450692564249039, + "loss_ce": 0.006340420339256525, + "loss_iou": 0.298828125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 844131308, + "step": 8706 + }, + { + "epoch": 0.8512905748924521, + "grad_norm": 5.765670560936219, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 844228568, + "step": 8707 + }, + { + "epoch": 0.8512905748924521, + "loss": 0.06645374000072479, + "loss_ce": 0.003938484936952591, + "loss_iou": 0.38671875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 844228568, + "step": 8707 + }, + { + "epoch": 0.8513883457176379, + "grad_norm": 5.055292950707559, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 844325100, + "step": 8708 + }, + { + "epoch": 0.8513883457176379, + "loss": 0.0746956467628479, + "loss_ce": 0.007801113650202751, + "loss_iou": 0.296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 844325100, + "step": 8708 + }, + { + "epoch": 0.8514861165428236, + "grad_norm": 8.577307118937528, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 844421352, + "step": 8709 + }, + { + "epoch": 0.8514861165428236, + "loss": 0.09629915654659271, + "loss_ce": 0.010682092979550362, + "loss_iou": 0.3984375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 844421352, + "step": 8709 + }, + { + "epoch": 0.8515838873680094, + "grad_norm": 18.91852489392514, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 844518800, + "step": 8710 + }, + { + "epoch": 0.8515838873680094, + "loss": 0.13447539508342743, + "loss_ce": 0.007400200702250004, + "loss_iou": 0.173828125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 844518800, + "step": 8710 + }, + { + "epoch": 0.8516816581931952, + "grad_norm": 25.99807662552245, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 844614928, + "step": 8711 + }, + { + "epoch": 0.8516816581931952, + "loss": 0.07146209478378296, + "loss_ce": 0.0073828077875077724, + "loss_iou": 0.255859375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 844614928, + "step": 8711 + }, + { + "epoch": 0.8517794290183809, + "grad_norm": 15.060260194858136, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 844711680, + "step": 8712 + }, + { + "epoch": 0.8517794290183809, + "loss": 0.04405956342816353, + "loss_ce": 0.0023725535720586777, + "loss_iou": 0.248046875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 844711680, + "step": 8712 + }, + { + "epoch": 0.8518771998435667, + "grad_norm": 8.86588224555383, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 844809168, + "step": 8713 + }, + { + "epoch": 0.8518771998435667, + "loss": 0.07972671091556549, + "loss_ce": 0.006713402923196554, + "loss_iou": 0.3203125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 844809168, + "step": 8713 + }, + { + "epoch": 0.8519749706687525, + "grad_norm": 6.00885606471126, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 844905628, + "step": 8714 + }, + { + "epoch": 0.8519749706687525, + "loss": 0.07944122701883316, + "loss_ce": 0.008037719875574112, + "loss_iou": 0.25390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 844905628, + "step": 8714 + }, + { + "epoch": 0.8520727414939382, + "grad_norm": 16.7666530858758, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 845002220, + "step": 8715 + }, + { + "epoch": 0.8520727414939382, + "loss": 0.08454374969005585, + "loss_ce": 0.0063577136024832726, + "loss_iou": 0.2001953125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 845002220, + "step": 8715 + }, + { + "epoch": 0.852170512319124, + "grad_norm": 11.228778980302378, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 845098740, + "step": 8716 + }, + { + "epoch": 0.852170512319124, + "loss": 0.12701448798179626, + "loss_ce": 0.00918612815439701, + "loss_iou": 0.21875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 845098740, + "step": 8716 + }, + { + "epoch": 0.8522682831443097, + "grad_norm": 7.89754064676584, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 845196256, + "step": 8717 + }, + { + "epoch": 0.8522682831443097, + "loss": 0.10916589200496674, + "loss_ce": 0.005421380512416363, + "loss_iou": 0.2890625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 845196256, + "step": 8717 + }, + { + "epoch": 0.8523660539694955, + "grad_norm": 9.700491435703688, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 845293656, + "step": 8718 + }, + { + "epoch": 0.8523660539694955, + "loss": 0.10277030616998672, + "loss_ce": 0.003366925287991762, + "loss_iou": 0.35546875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 845293656, + "step": 8718 + }, + { + "epoch": 0.8524638247946813, + "grad_norm": 6.757337153148635, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 845390732, + "step": 8719 + }, + { + "epoch": 0.8524638247946813, + "loss": 0.05634346231818199, + "loss_ce": 0.003380207112058997, + "loss_iou": 0.26953125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 845390732, + "step": 8719 + }, + { + "epoch": 0.852561595619867, + "grad_norm": 17.088195797158342, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 845487396, + "step": 8720 + }, + { + "epoch": 0.852561595619867, + "loss": 0.10198862850666046, + "loss_ce": 0.011420097202062607, + "loss_iou": 0.26953125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 845487396, + "step": 8720 + }, + { + "epoch": 0.8526593664450528, + "grad_norm": 24.92862512732192, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 845584564, + "step": 8721 + }, + { + "epoch": 0.8526593664450528, + "loss": 0.07184307277202606, + "loss_ce": 0.008900566026568413, + "loss_iou": 0.267578125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 845584564, + "step": 8721 + }, + { + "epoch": 0.8527571372702386, + "grad_norm": 4.890371835484729, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 845681360, + "step": 8722 + }, + { + "epoch": 0.8527571372702386, + "loss": 0.09437872469425201, + "loss_ce": 0.003146418835967779, + "loss_iou": 0.298828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 845681360, + "step": 8722 + }, + { + "epoch": 0.8528549080954243, + "grad_norm": 10.950620141553394, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 845779772, + "step": 8723 + }, + { + "epoch": 0.8528549080954243, + "loss": 0.06916245818138123, + "loss_ce": 0.002817244501784444, + "loss_iou": 0.326171875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 845779772, + "step": 8723 + }, + { + "epoch": 0.8529526789206101, + "grad_norm": 4.884054383677946, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 845877308, + "step": 8724 + }, + { + "epoch": 0.8529526789206101, + "loss": 0.08365141600370407, + "loss_ce": 0.00935637578368187, + "loss_iou": 0.3203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 845877308, + "step": 8724 + }, + { + "epoch": 0.8530504497457958, + "grad_norm": 22.293236883309184, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 845974456, + "step": 8725 + }, + { + "epoch": 0.8530504497457958, + "loss": 0.1410418450832367, + "loss_ce": 0.007420624606311321, + "loss_iou": 0.1796875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 845974456, + "step": 8725 + }, + { + "epoch": 0.8531482205709816, + "grad_norm": 13.597556805481448, + "learning_rate": 5e-05, + "loss": 0.118, + "num_input_tokens_seen": 846071840, + "step": 8726 + }, + { + "epoch": 0.8531482205709816, + "loss": 0.13269111514091492, + "loss_ce": 0.005356517154723406, + "loss_iou": 0.416015625, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 846071840, + "step": 8726 + }, + { + "epoch": 0.8532459913961674, + "grad_norm": 6.36382489702861, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 846168156, + "step": 8727 + }, + { + "epoch": 0.8532459913961674, + "loss": 0.08663053810596466, + "loss_ce": 0.010672280564904213, + "loss_iou": 0.2314453125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 846168156, + "step": 8727 + }, + { + "epoch": 0.8533437622213531, + "grad_norm": 6.70791794989441, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 846265448, + "step": 8728 + }, + { + "epoch": 0.8533437622213531, + "loss": 0.06687181442975998, + "loss_ce": 0.012119464576244354, + "loss_iou": 0.26171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 846265448, + "step": 8728 + }, + { + "epoch": 0.8534415330465389, + "grad_norm": 3.027128370494202, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 846361284, + "step": 8729 + }, + { + "epoch": 0.8534415330465389, + "loss": 0.04676125943660736, + "loss_ce": 0.003777250647544861, + "loss_iou": 0.236328125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 846361284, + "step": 8729 + }, + { + "epoch": 0.8535393038717247, + "grad_norm": 14.20540683555645, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 846458828, + "step": 8730 + }, + { + "epoch": 0.8535393038717247, + "loss": 0.05014701932668686, + "loss_ce": 0.004782639443874359, + "loss_iou": 0.259765625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 846458828, + "step": 8730 + }, + { + "epoch": 0.8536370746969104, + "grad_norm": 15.762395880299762, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 846555676, + "step": 8731 + }, + { + "epoch": 0.8536370746969104, + "loss": 0.05883980169892311, + "loss_ce": 0.004838945809751749, + "loss_iou": 0.294921875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 846555676, + "step": 8731 + }, + { + "epoch": 0.8537348455220962, + "grad_norm": 7.058110819783385, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 846651868, + "step": 8732 + }, + { + "epoch": 0.8537348455220962, + "loss": 0.062161002308130264, + "loss_ce": 0.005306757986545563, + "loss_iou": 0.263671875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 846651868, + "step": 8732 + }, + { + "epoch": 0.853832616347282, + "grad_norm": 20.883392686198686, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 846749348, + "step": 8733 + }, + { + "epoch": 0.853832616347282, + "loss": 0.07279521971940994, + "loss_ce": 0.005549736320972443, + "loss_iou": 0.23046875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 846749348, + "step": 8733 + }, + { + "epoch": 0.8539303871724677, + "grad_norm": 14.110502260894044, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 846846528, + "step": 8734 + }, + { + "epoch": 0.8539303871724677, + "loss": 0.06418120115995407, + "loss_ce": 0.00630461610853672, + "loss_iou": 0.271484375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 846846528, + "step": 8734 + }, + { + "epoch": 0.8540281579976535, + "grad_norm": 12.9422050199893, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 846942012, + "step": 8735 + }, + { + "epoch": 0.8540281579976535, + "loss": 0.0871235579252243, + "loss_ce": 0.004199670162051916, + "loss_iou": 0.23046875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 846942012, + "step": 8735 + }, + { + "epoch": 0.8541259288228392, + "grad_norm": 3.181713767353761, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 847038708, + "step": 8736 + }, + { + "epoch": 0.8541259288228392, + "loss": 0.07180628180503845, + "loss_ce": 0.008589118719100952, + "loss_iou": 0.216796875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 847038708, + "step": 8736 + }, + { + "epoch": 0.854223699648025, + "grad_norm": 2.2584766985252247, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 847135368, + "step": 8737 + }, + { + "epoch": 0.854223699648025, + "loss": 0.0410940907895565, + "loss_ce": 0.005144383292645216, + "loss_iou": 0.25390625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 847135368, + "step": 8737 + }, + { + "epoch": 0.8543214704732108, + "grad_norm": 3.878871168535389, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 847231812, + "step": 8738 + }, + { + "epoch": 0.8543214704732108, + "loss": 0.061701998114585876, + "loss_ce": 0.004138214513659477, + "loss_iou": 0.2099609375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 847231812, + "step": 8738 + }, + { + "epoch": 0.8544192412983965, + "grad_norm": 20.734578770247623, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 847329204, + "step": 8739 + }, + { + "epoch": 0.8544192412983965, + "loss": 0.09625425934791565, + "loss_ce": 0.004686263855546713, + "loss_iou": 0.30078125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 847329204, + "step": 8739 + }, + { + "epoch": 0.8545170121235823, + "grad_norm": 6.748327674079823, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 847425664, + "step": 8740 + }, + { + "epoch": 0.8545170121235823, + "loss": 0.04082125052809715, + "loss_ce": 0.0013315046671777964, + "loss_iou": 0.263671875, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 847425664, + "step": 8740 + }, + { + "epoch": 0.8546147829487681, + "grad_norm": 4.7998177268839, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 847522940, + "step": 8741 + }, + { + "epoch": 0.8546147829487681, + "loss": 0.10337699949741364, + "loss_ce": 0.003218304365873337, + "loss_iou": 0.28515625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 847522940, + "step": 8741 + }, + { + "epoch": 0.8547125537739538, + "grad_norm": 20.860779239305398, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 847620520, + "step": 8742 + }, + { + "epoch": 0.8547125537739538, + "loss": 0.07153981924057007, + "loss_ce": 0.0032033389434218407, + "loss_iou": 0.427734375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 847620520, + "step": 8742 + }, + { + "epoch": 0.8548103245991396, + "grad_norm": 31.78724325430947, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 847716432, + "step": 8743 + }, + { + "epoch": 0.8548103245991396, + "loss": 0.03777199238538742, + "loss_ce": 0.004813011270016432, + "loss_iou": 0.28515625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 847716432, + "step": 8743 + }, + { + "epoch": 0.8549080954243253, + "grad_norm": 17.86922482152694, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 847813956, + "step": 8744 + }, + { + "epoch": 0.8549080954243253, + "loss": 0.08986321836709976, + "loss_ce": 0.005855956114828587, + "loss_iou": 0.283203125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 847813956, + "step": 8744 + }, + { + "epoch": 0.8550058662495111, + "grad_norm": 6.7876505934211515, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 847911764, + "step": 8745 + }, + { + "epoch": 0.8550058662495111, + "loss": 0.14648272097110748, + "loss_ce": 0.008482223376631737, + "loss_iou": 0.291015625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 847911764, + "step": 8745 + }, + { + "epoch": 0.8551036370746969, + "grad_norm": 13.864049252134413, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 848009192, + "step": 8746 + }, + { + "epoch": 0.8551036370746969, + "loss": 0.10983750224113464, + "loss_ce": 0.0031175375916063786, + "loss_iou": 0.236328125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 848009192, + "step": 8746 + }, + { + "epoch": 0.8552014078998826, + "grad_norm": 41.157776364760814, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 848105972, + "step": 8747 + }, + { + "epoch": 0.8552014078998826, + "loss": 0.12913168966770172, + "loss_ce": 0.004024875815957785, + "loss_iou": 0.28125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 848105972, + "step": 8747 + }, + { + "epoch": 0.8552991787250684, + "grad_norm": 14.12776096611228, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 848202456, + "step": 8748 + }, + { + "epoch": 0.8552991787250684, + "loss": 0.11304296553134918, + "loss_ce": 0.006912374868988991, + "loss_iou": 0.296875, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 848202456, + "step": 8748 + }, + { + "epoch": 0.8553969495502542, + "grad_norm": 8.553047364951027, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 848298044, + "step": 8749 + }, + { + "epoch": 0.8553969495502542, + "loss": 0.049076832830905914, + "loss_ce": 0.003559863194823265, + "loss_iou": 0.267578125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 848298044, + "step": 8749 + }, + { + "epoch": 0.8554947203754399, + "grad_norm": 3.0037790373605295, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 848395560, + "step": 8750 + }, + { + "epoch": 0.8554947203754399, + "eval_seeclick_CIoU": 0.5556900501251221, + "eval_seeclick_GIoU": 0.556911289691925, + "eval_seeclick_IoU": 0.594916969537735, + "eval_seeclick_MAE_all": 0.0676566306501627, + "eval_seeclick_MAE_h": 0.04215809144079685, + "eval_seeclick_MAE_w": 0.09245654568076134, + "eval_seeclick_MAE_x": 0.0935608334839344, + "eval_seeclick_MAE_y": 0.04245105944573879, + "eval_seeclick_NUM_probability": 0.999999076128006, + "eval_seeclick_inside_bbox": 0.8309659063816071, + "eval_seeclick_loss": 0.2429952770471573, + "eval_seeclick_loss_ce": 0.010103475768119097, + "eval_seeclick_loss_iou": 0.41961669921875, + "eval_seeclick_loss_num": 0.047847747802734375, + "eval_seeclick_loss_xval": 0.23944091796875, + "eval_seeclick_runtime": 76.4346, + "eval_seeclick_samples_per_second": 0.563, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 848395560, + "step": 8750 + }, + { + "epoch": 0.8554947203754399, + "eval_icons_CIoU": 0.6587079465389252, + "eval_icons_GIoU": 0.6618852019309998, + "eval_icons_IoU": 0.6963165402412415, + "eval_icons_MAE_all": 0.068962212651968, + "eval_icons_MAE_h": 0.08233117125928402, + "eval_icons_MAE_w": 0.05575951747596264, + "eval_icons_MAE_x": 0.05634081922471523, + "eval_icons_MAE_y": 0.08141734078526497, + "eval_icons_NUM_probability": 0.9999993145465851, + "eval_icons_inside_bbox": 0.8038194477558136, + "eval_icons_loss": 0.20967836678028107, + "eval_icons_loss_ce": 2.9725801596214296e-05, + "eval_icons_loss_iou": 0.31829833984375, + "eval_icons_loss_num": 0.048816680908203125, + "eval_icons_loss_xval": 0.244171142578125, + "eval_icons_runtime": 86.9838, + "eval_icons_samples_per_second": 0.575, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 848395560, + "step": 8750 + }, + { + "epoch": 0.8554947203754399, + "eval_screenspot_CIoU": 0.26860573266943294, + "eval_screenspot_GIoU": 0.2391352200259765, + "eval_screenspot_IoU": 0.3702520728111267, + "eval_screenspot_MAE_all": 0.1810564324259758, + "eval_screenspot_MAE_h": 0.12892194092273712, + "eval_screenspot_MAE_w": 0.2318971330920855, + "eval_screenspot_MAE_x": 0.23717334121465683, + "eval_screenspot_MAE_y": 0.1262333020567894, + "eval_screenspot_NUM_probability": 0.9999983509381613, + "eval_screenspot_inside_bbox": 0.5795833269755045, + "eval_screenspot_loss": 0.6323378682136536, + "eval_screenspot_loss_ce": 0.02434539111951987, + "eval_screenspot_loss_iou": 0.3144124348958333, + "eval_screenspot_loss_num": 0.12303670247395833, + "eval_screenspot_loss_xval": 0.6149495442708334, + "eval_screenspot_runtime": 147.864, + "eval_screenspot_samples_per_second": 0.602, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 848395560, + "step": 8750 + }, + { + "epoch": 0.8554947203754399, + "eval_compot_CIoU": 0.5278950929641724, + "eval_compot_GIoU": 0.5248430073261261, + "eval_compot_IoU": 0.5791644155979156, + "eval_compot_MAE_all": 0.06817414425313473, + "eval_compot_MAE_h": 0.05497724376618862, + "eval_compot_MAE_w": 0.08069236949086189, + "eval_compot_MAE_x": 0.07930619642138481, + "eval_compot_MAE_y": 0.05772077850997448, + "eval_compot_NUM_probability": 0.9999851584434509, + "eval_compot_inside_bbox": 0.7204861044883728, + "eval_compot_loss": 0.24465377628803253, + "eval_compot_loss_ce": 0.021712005138397217, + "eval_compot_loss_iou": 0.4488525390625, + "eval_compot_loss_num": 0.041027069091796875, + "eval_compot_loss_xval": 0.2052154541015625, + "eval_compot_runtime": 99.4348, + "eval_compot_samples_per_second": 0.503, + "eval_compot_steps_per_second": 0.02, + "num_input_tokens_seen": 848395560, + "step": 8750 + }, + { + "epoch": 0.8554947203754399, + "loss": 0.19484098255634308, + "loss_ce": 0.022019941359758377, + "loss_iou": 0.455078125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 848395560, + "step": 8750 + }, + { + "epoch": 0.8555924912006257, + "grad_norm": 3.612184319261686, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 848491948, + "step": 8751 + }, + { + "epoch": 0.8555924912006257, + "loss": 0.0629362016916275, + "loss_ce": 0.0037130266427993774, + "loss_iou": 0.1943359375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 848491948, + "step": 8751 + }, + { + "epoch": 0.8556902620258114, + "grad_norm": 2.972063564051066, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 848588748, + "step": 8752 + }, + { + "epoch": 0.8556902620258114, + "loss": 0.052017584443092346, + "loss_ce": 0.010818852111697197, + "loss_iou": 0.2333984375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 848588748, + "step": 8752 + }, + { + "epoch": 0.8557880328509972, + "grad_norm": 11.83182748274025, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 848686272, + "step": 8753 + }, + { + "epoch": 0.8557880328509972, + "loss": 0.060579001903533936, + "loss_ce": 0.004083334468305111, + "loss_iou": 0.26171875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 848686272, + "step": 8753 + }, + { + "epoch": 0.855885803676183, + "grad_norm": 13.959713463051733, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 848784068, + "step": 8754 + }, + { + "epoch": 0.855885803676183, + "loss": 0.043743573129177094, + "loss_ce": 0.003391703125089407, + "loss_iou": 0.29296875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 848784068, + "step": 8754 + }, + { + "epoch": 0.8559835745013687, + "grad_norm": 10.027445327310684, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 848882444, + "step": 8755 + }, + { + "epoch": 0.8559835745013687, + "loss": 0.06959035992622375, + "loss_ce": 0.004496368579566479, + "loss_iou": 0.404296875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 848882444, + "step": 8755 + }, + { + "epoch": 0.8560813453265546, + "grad_norm": 6.3474212653946065, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 848977364, + "step": 8756 + }, + { + "epoch": 0.8560813453265546, + "loss": 0.05283214896917343, + "loss_ce": 0.005224722903221846, + "loss_iou": 0.33984375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 848977364, + "step": 8756 + }, + { + "epoch": 0.8561791161517404, + "grad_norm": 5.829255665512668, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 849074080, + "step": 8757 + }, + { + "epoch": 0.8561791161517404, + "loss": 0.04506375640630722, + "loss_ce": 0.0024307002313435078, + "loss_iou": 0.236328125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 849074080, + "step": 8757 + }, + { + "epoch": 0.856276886976926, + "grad_norm": 3.134478347638052, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 849171124, + "step": 8758 + }, + { + "epoch": 0.856276886976926, + "loss": 0.11575636267662048, + "loss_ce": 0.004702897276729345, + "loss_iou": 0.359375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 849171124, + "step": 8758 + }, + { + "epoch": 0.8563746578021119, + "grad_norm": 14.226794095211842, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 849268348, + "step": 8759 + }, + { + "epoch": 0.8563746578021119, + "loss": 0.09228301048278809, + "loss_ce": 0.007169488351792097, + "loss_iou": 0.255859375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 849268348, + "step": 8759 + }, + { + "epoch": 0.8564724286272977, + "grad_norm": 10.634285316370228, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 849366308, + "step": 8760 + }, + { + "epoch": 0.8564724286272977, + "loss": 0.10468477010726929, + "loss_ce": 0.004342964384704828, + "loss_iou": 0.265625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 849366308, + "step": 8760 + }, + { + "epoch": 0.8565701994524834, + "grad_norm": 5.958954369064459, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 849463044, + "step": 8761 + }, + { + "epoch": 0.8565701994524834, + "loss": 0.05418974161148071, + "loss_ce": 0.006521284114569426, + "loss_iou": 0.2294921875, + "loss_num": 0.00958251953125, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 849463044, + "step": 8761 + }, + { + "epoch": 0.8566679702776692, + "grad_norm": 5.751278741146232, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 849560284, + "step": 8762 + }, + { + "epoch": 0.8566679702776692, + "loss": 0.07408042997121811, + "loss_ce": 0.006895978003740311, + "loss_iou": 0.31640625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 849560284, + "step": 8762 + }, + { + "epoch": 0.8567657411028549, + "grad_norm": 4.672225169096458, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 849656920, + "step": 8763 + }, + { + "epoch": 0.8567657411028549, + "loss": 0.09786097705364227, + "loss_ce": 0.007040667347609997, + "loss_iou": 0.2470703125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 849656920, + "step": 8763 + }, + { + "epoch": 0.8568635119280407, + "grad_norm": 9.177936815814101, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 849754156, + "step": 8764 + }, + { + "epoch": 0.8568635119280407, + "loss": 0.05086611956357956, + "loss_ce": 0.007767672650516033, + "loss_iou": 0.30078125, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 849754156, + "step": 8764 + }, + { + "epoch": 0.8569612827532265, + "grad_norm": 13.638967502755987, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 849851528, + "step": 8765 + }, + { + "epoch": 0.8569612827532265, + "loss": 0.06523449718952179, + "loss_ce": 0.0030091560911387205, + "loss_iou": 0.30859375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 849851528, + "step": 8765 + }, + { + "epoch": 0.8570590535784122, + "grad_norm": 3.6580567722409136, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 849948104, + "step": 8766 + }, + { + "epoch": 0.8570590535784122, + "loss": 0.10263361781835556, + "loss_ce": 0.0016509522683918476, + "loss_iou": 0.330078125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 849948104, + "step": 8766 + }, + { + "epoch": 0.857156824403598, + "grad_norm": 1.805173921845447, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 850044632, + "step": 8767 + }, + { + "epoch": 0.857156824403598, + "loss": 0.06264306604862213, + "loss_ce": 0.006742487661540508, + "loss_iou": 0.2412109375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 850044632, + "step": 8767 + }, + { + "epoch": 0.8572545952287838, + "grad_norm": 1.7032617750407881, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 850139964, + "step": 8768 + }, + { + "epoch": 0.8572545952287838, + "loss": 0.06374302506446838, + "loss_ce": 0.005553631577640772, + "loss_iou": 0.27734375, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 850139964, + "step": 8768 + }, + { + "epoch": 0.8573523660539695, + "grad_norm": 4.36485998092569, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 850235864, + "step": 8769 + }, + { + "epoch": 0.8573523660539695, + "loss": 0.059794776141643524, + "loss_ce": 0.005229346454143524, + "loss_iou": 0.244140625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 850235864, + "step": 8769 + }, + { + "epoch": 0.8574501368791553, + "grad_norm": 11.57829813311015, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 850333052, + "step": 8770 + }, + { + "epoch": 0.8574501368791553, + "loss": 0.05720895528793335, + "loss_ce": 0.002780857030302286, + "loss_iou": 0.328125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 850333052, + "step": 8770 + }, + { + "epoch": 0.857547907704341, + "grad_norm": 14.845930783434783, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 850431016, + "step": 8771 + }, + { + "epoch": 0.857547907704341, + "loss": 0.03587077185511589, + "loss_ce": 0.005307418294250965, + "loss_iou": 0.3125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 850431016, + "step": 8771 + }, + { + "epoch": 0.8576456785295268, + "grad_norm": 4.479396447186347, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 850527320, + "step": 8772 + }, + { + "epoch": 0.8576456785295268, + "loss": 0.11658567190170288, + "loss_ce": 0.008072799071669579, + "loss_iou": 0.263671875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 850527320, + "step": 8772 + }, + { + "epoch": 0.8577434493547126, + "grad_norm": 4.64586725708458, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 850624248, + "step": 8773 + }, + { + "epoch": 0.8577434493547126, + "loss": 0.05147124081850052, + "loss_ce": 0.006316673010587692, + "loss_iou": 0.23828125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 850624248, + "step": 8773 + }, + { + "epoch": 0.8578412201798983, + "grad_norm": 9.747337415255489, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 850720928, + "step": 8774 + }, + { + "epoch": 0.8578412201798983, + "loss": 0.07735341787338257, + "loss_ce": 0.0028809942305088043, + "loss_iou": 0.3828125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 850720928, + "step": 8774 + }, + { + "epoch": 0.8579389910050841, + "grad_norm": 4.910169947843357, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 850817520, + "step": 8775 + }, + { + "epoch": 0.8579389910050841, + "loss": 0.13751405477523804, + "loss_ce": 0.009561484679579735, + "loss_iou": 0.271484375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 850817520, + "step": 8775 + }, + { + "epoch": 0.8580367618302699, + "grad_norm": 7.352004221862012, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 850914328, + "step": 8776 + }, + { + "epoch": 0.8580367618302699, + "loss": 0.10325123369693756, + "loss_ce": 0.008768809027969837, + "loss_iou": 0.33203125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 850914328, + "step": 8776 + }, + { + "epoch": 0.8581345326554556, + "grad_norm": 3.142779508312705, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 851011180, + "step": 8777 + }, + { + "epoch": 0.8581345326554556, + "loss": 0.08434230089187622, + "loss_ce": 0.00609523244202137, + "loss_iou": 0.31640625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 851011180, + "step": 8777 + }, + { + "epoch": 0.8582323034806414, + "grad_norm": 3.284577441831844, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 851107784, + "step": 8778 + }, + { + "epoch": 0.8582323034806414, + "loss": 0.07136158645153046, + "loss_ce": 0.0026131097692996264, + "loss_iou": 0.2060546875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 851107784, + "step": 8778 + }, + { + "epoch": 0.8583300743058272, + "grad_norm": 4.238770923924417, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 851205016, + "step": 8779 + }, + { + "epoch": 0.8583300743058272, + "loss": 0.054084762930870056, + "loss_ce": 0.0038146807346493006, + "loss_iou": 0.263671875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 851205016, + "step": 8779 + }, + { + "epoch": 0.8584278451310129, + "grad_norm": 5.534892838222858, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 851301684, + "step": 8780 + }, + { + "epoch": 0.8584278451310129, + "loss": 0.04799920320510864, + "loss_ce": 0.007181941531598568, + "loss_iou": 0.2060546875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 851301684, + "step": 8780 + }, + { + "epoch": 0.8585256159561987, + "grad_norm": 8.791325835636876, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 851398632, + "step": 8781 + }, + { + "epoch": 0.8585256159561987, + "loss": 0.0835215225815773, + "loss_ce": 0.004572548437863588, + "loss_iou": 0.353515625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 851398632, + "step": 8781 + }, + { + "epoch": 0.8586233867813844, + "grad_norm": 38.29610214573742, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 851496552, + "step": 8782 + }, + { + "epoch": 0.8586233867813844, + "loss": 0.05730713531374931, + "loss_ce": 0.00353516498580575, + "loss_iou": 0.3359375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 851496552, + "step": 8782 + }, + { + "epoch": 0.8587211576065702, + "grad_norm": 11.76182387701799, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 851593500, + "step": 8783 + }, + { + "epoch": 0.8587211576065702, + "loss": 0.08389414846897125, + "loss_ce": 0.005494491662830114, + "loss_iou": 0.306640625, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 851593500, + "step": 8783 + }, + { + "epoch": 0.858818928431756, + "grad_norm": 24.921295059212497, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 851690272, + "step": 8784 + }, + { + "epoch": 0.858818928431756, + "loss": 0.10571645945310593, + "loss_ce": 0.0051762983202934265, + "loss_iou": 0.251953125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 851690272, + "step": 8784 + }, + { + "epoch": 0.8589166992569417, + "grad_norm": 1.9801616127619706, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 851787112, + "step": 8785 + }, + { + "epoch": 0.8589166992569417, + "loss": 0.07034885138273239, + "loss_ce": 0.006002532318234444, + "loss_iou": 0.228515625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 851787112, + "step": 8785 + }, + { + "epoch": 0.8590144700821275, + "grad_norm": 4.123148737870575, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 851884192, + "step": 8786 + }, + { + "epoch": 0.8590144700821275, + "loss": 0.11288507282733917, + "loss_ce": 0.008400516584515572, + "loss_iou": 0.251953125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 851884192, + "step": 8786 + }, + { + "epoch": 0.8591122409073133, + "grad_norm": 11.588073477103217, + "learning_rate": 5e-05, + "loss": 0.1206, + "num_input_tokens_seen": 851979832, + "step": 8787 + }, + { + "epoch": 0.8591122409073133, + "loss": 0.12429195642471313, + "loss_ce": 0.008370937779545784, + "loss_iou": 0.3203125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 851979832, + "step": 8787 + }, + { + "epoch": 0.859210011732499, + "grad_norm": 19.68195203616599, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 852077224, + "step": 8788 + }, + { + "epoch": 0.859210011732499, + "loss": 0.061343323439359665, + "loss_ce": 0.010531553998589516, + "loss_iou": 0.353515625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 852077224, + "step": 8788 + }, + { + "epoch": 0.8593077825576848, + "grad_norm": 9.404602154714782, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 852174540, + "step": 8789 + }, + { + "epoch": 0.8593077825576848, + "loss": 0.06852402538061142, + "loss_ce": 0.004704141989350319, + "loss_iou": 0.3125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 852174540, + "step": 8789 + }, + { + "epoch": 0.8594055533828705, + "grad_norm": 3.3326392625191086, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 852271920, + "step": 8790 + }, + { + "epoch": 0.8594055533828705, + "loss": 0.03383655101060867, + "loss_ce": 0.004318425431847572, + "loss_iou": 0.291015625, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 852271920, + "step": 8790 + }, + { + "epoch": 0.8595033242080563, + "grad_norm": 59.38948164626637, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 852368304, + "step": 8791 + }, + { + "epoch": 0.8595033242080563, + "loss": 0.08941228687763214, + "loss_ce": 0.004481866955757141, + "loss_iou": 0.314453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 852368304, + "step": 8791 + }, + { + "epoch": 0.8596010950332421, + "grad_norm": 98.70699600121763, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 852464708, + "step": 8792 + }, + { + "epoch": 0.8596010950332421, + "loss": 0.09851144254207611, + "loss_ce": 0.0077979383058846, + "loss_iou": 0.349609375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 852464708, + "step": 8792 + }, + { + "epoch": 0.8596988658584278, + "grad_norm": 10.357018267053267, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 852562760, + "step": 8793 + }, + { + "epoch": 0.8596988658584278, + "loss": 0.0763755589723587, + "loss_ce": 0.0012717947829514742, + "loss_iou": 0.33984375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 852562760, + "step": 8793 + }, + { + "epoch": 0.8597966366836136, + "grad_norm": 9.891374973246908, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 852660128, + "step": 8794 + }, + { + "epoch": 0.8597966366836136, + "loss": 0.07865098118782043, + "loss_ce": 0.006568466313183308, + "loss_iou": 0.32421875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 852660128, + "step": 8794 + }, + { + "epoch": 0.8598944075087994, + "grad_norm": 4.726419924433726, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 852758516, + "step": 8795 + }, + { + "epoch": 0.8598944075087994, + "loss": 0.050602398812770844, + "loss_ce": 0.0048336610198020935, + "loss_iou": 0.392578125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 852758516, + "step": 8795 + }, + { + "epoch": 0.8599921783339851, + "grad_norm": 8.045518411472587, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 852855036, + "step": 8796 + }, + { + "epoch": 0.8599921783339851, + "loss": 0.07206763327121735, + "loss_ce": 0.003982909955084324, + "loss_iou": 0.306640625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 852855036, + "step": 8796 + }, + { + "epoch": 0.8600899491591709, + "grad_norm": 3.973729840912578, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 852952224, + "step": 8797 + }, + { + "epoch": 0.8600899491591709, + "loss": 0.09579448401927948, + "loss_ce": 0.0041349465027451515, + "loss_iou": 0.251953125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 852952224, + "step": 8797 + }, + { + "epoch": 0.8601877199843566, + "grad_norm": 2.526305054225633, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 853049424, + "step": 8798 + }, + { + "epoch": 0.8601877199843566, + "loss": 0.0805857926607132, + "loss_ce": 0.007770857308059931, + "loss_iou": 0.28515625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 853049424, + "step": 8798 + }, + { + "epoch": 0.8602854908095424, + "grad_norm": 8.962771204808627, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 853147204, + "step": 8799 + }, + { + "epoch": 0.8602854908095424, + "loss": 0.06451970338821411, + "loss_ce": 0.0033395919017493725, + "loss_iou": 0.298828125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 853147204, + "step": 8799 + }, + { + "epoch": 0.8603832616347282, + "grad_norm": 3.5456963020990817, + "learning_rate": 5e-05, + "loss": 0.1099, + "num_input_tokens_seen": 853244284, + "step": 8800 + }, + { + "epoch": 0.8603832616347282, + "loss": 0.1204138770699501, + "loss_ce": 0.006873229052871466, + "loss_iou": 0.291015625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 853244284, + "step": 8800 + }, + { + "epoch": 0.8604810324599139, + "grad_norm": 17.725267421426356, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 853341404, + "step": 8801 + }, + { + "epoch": 0.8604810324599139, + "loss": 0.08142496645450592, + "loss_ce": 0.005069991573691368, + "loss_iou": 0.423828125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 853341404, + "step": 8801 + }, + { + "epoch": 0.8605788032850997, + "grad_norm": 11.324585865565183, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 853437784, + "step": 8802 + }, + { + "epoch": 0.8605788032850997, + "loss": 0.09600229561328888, + "loss_ce": 0.004541113972663879, + "loss_iou": 0.2109375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 853437784, + "step": 8802 + }, + { + "epoch": 0.8606765741102855, + "grad_norm": 4.395326040877941, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 853535056, + "step": 8803 + }, + { + "epoch": 0.8606765741102855, + "loss": 0.09591907262802124, + "loss_ce": 0.005037728231400251, + "loss_iou": 0.3515625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 853535056, + "step": 8803 + }, + { + "epoch": 0.8607743449354712, + "grad_norm": 23.859312230411057, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 853631820, + "step": 8804 + }, + { + "epoch": 0.8607743449354712, + "loss": 0.10050596296787262, + "loss_ce": 0.0047418042086064816, + "loss_iou": 0.3203125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 853631820, + "step": 8804 + }, + { + "epoch": 0.860872115760657, + "grad_norm": 6.922799082738024, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 853728540, + "step": 8805 + }, + { + "epoch": 0.860872115760657, + "loss": 0.047812286764383316, + "loss_ce": 0.0048988498747348785, + "loss_iou": 0.27734375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 853728540, + "step": 8805 + }, + { + "epoch": 0.8609698865858428, + "grad_norm": 8.639198938925901, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 853826520, + "step": 8806 + }, + { + "epoch": 0.8609698865858428, + "loss": 0.08130506426095963, + "loss_ce": 0.0058465376496315, + "loss_iou": 0.28125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 853826520, + "step": 8806 + }, + { + "epoch": 0.8610676574110285, + "grad_norm": 36.95588298345754, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 853923176, + "step": 8807 + }, + { + "epoch": 0.8610676574110285, + "loss": 0.09749840199947357, + "loss_ce": 0.007761464454233646, + "loss_iou": 0.306640625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 853923176, + "step": 8807 + }, + { + "epoch": 0.8611654282362143, + "grad_norm": 10.10983827611461, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 854020080, + "step": 8808 + }, + { + "epoch": 0.8611654282362143, + "loss": 0.08718624711036682, + "loss_ce": 0.006806760095059872, + "loss_iou": 0.26171875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 854020080, + "step": 8808 + }, + { + "epoch": 0.8612631990614, + "grad_norm": 2.702664258152939, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 854117252, + "step": 8809 + }, + { + "epoch": 0.8612631990614, + "loss": 0.06179951876401901, + "loss_ce": 0.0039687068201601505, + "loss_iou": 0.25, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 854117252, + "step": 8809 + }, + { + "epoch": 0.8613609698865858, + "grad_norm": 18.182174060034068, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 854213780, + "step": 8810 + }, + { + "epoch": 0.8613609698865858, + "loss": 0.11617644131183624, + "loss_ce": 0.0060384999960660934, + "loss_iou": 0.3125, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 854213780, + "step": 8810 + }, + { + "epoch": 0.8614587407117716, + "grad_norm": 4.510247280997851, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 854311128, + "step": 8811 + }, + { + "epoch": 0.8614587407117716, + "loss": 0.1132652536034584, + "loss_ce": 0.0077049522660672665, + "loss_iou": 0.25390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 854311128, + "step": 8811 + }, + { + "epoch": 0.8615565115369573, + "grad_norm": 3.826603926190169, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 854408520, + "step": 8812 + }, + { + "epoch": 0.8615565115369573, + "loss": 0.1276341676712036, + "loss_ce": 0.007883181795477867, + "loss_iou": 0.26953125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 854408520, + "step": 8812 + }, + { + "epoch": 0.8616542823621431, + "grad_norm": 4.610964578480915, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 854504792, + "step": 8813 + }, + { + "epoch": 0.8616542823621431, + "loss": 0.0880516767501831, + "loss_ce": 0.017952794209122658, + "loss_iou": 0.302734375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 854504792, + "step": 8813 + }, + { + "epoch": 0.861752053187329, + "grad_norm": 5.95176797270309, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 854601360, + "step": 8814 + }, + { + "epoch": 0.861752053187329, + "loss": 0.06152158975601196, + "loss_ce": 0.006143628153949976, + "loss_iou": 0.181640625, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 854601360, + "step": 8814 + }, + { + "epoch": 0.8618498240125146, + "grad_norm": 2.5001291024751544, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 854698000, + "step": 8815 + }, + { + "epoch": 0.8618498240125146, + "loss": 0.11256478726863861, + "loss_ce": 0.004288424737751484, + "loss_iou": 0.2734375, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 854698000, + "step": 8815 + }, + { + "epoch": 0.8619475948377004, + "grad_norm": 7.965815283637051, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 854794780, + "step": 8816 + }, + { + "epoch": 0.8619475948377004, + "loss": 0.054973624646663666, + "loss_ce": 0.006458302028477192, + "loss_iou": 0.20703125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 854794780, + "step": 8816 + }, + { + "epoch": 0.8620453656628861, + "grad_norm": 10.281677537765892, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 854892268, + "step": 8817 + }, + { + "epoch": 0.8620453656628861, + "loss": 0.06415624916553497, + "loss_ce": 0.0035941204987466335, + "loss_iou": 0.34765625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 854892268, + "step": 8817 + }, + { + "epoch": 0.862143136488072, + "grad_norm": 6.811911011141052, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 854989112, + "step": 8818 + }, + { + "epoch": 0.862143136488072, + "loss": 0.07605060935020447, + "loss_ce": 0.006897783372551203, + "loss_iou": 0.2080078125, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 854989112, + "step": 8818 + }, + { + "epoch": 0.8622409073132578, + "grad_norm": 3.026322863986386, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 855085560, + "step": 8819 + }, + { + "epoch": 0.8622409073132578, + "loss": 0.06198666989803314, + "loss_ce": 0.003171667456626892, + "loss_iou": 0.2138671875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 855085560, + "step": 8819 + }, + { + "epoch": 0.8623386781384434, + "grad_norm": 11.535561299486048, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 855182100, + "step": 8820 + }, + { + "epoch": 0.8623386781384434, + "loss": 0.10525618493556976, + "loss_ce": 0.010377030819654465, + "loss_iou": 0.30859375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 855182100, + "step": 8820 + }, + { + "epoch": 0.8624364489636293, + "grad_norm": 17.70753262844355, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 855279244, + "step": 8821 + }, + { + "epoch": 0.8624364489636293, + "loss": 0.06476718187332153, + "loss_ce": 0.004891698248684406, + "loss_iou": 0.337890625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 855279244, + "step": 8821 + }, + { + "epoch": 0.8625342197888151, + "grad_norm": 7.324579141455046, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 855376872, + "step": 8822 + }, + { + "epoch": 0.8625342197888151, + "loss": 0.05680962651968002, + "loss_ce": 0.003556454787030816, + "loss_iou": 0.3203125, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 855376872, + "step": 8822 + }, + { + "epoch": 0.8626319906140008, + "grad_norm": 8.621384435448913, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 855474248, + "step": 8823 + }, + { + "epoch": 0.8626319906140008, + "loss": 0.07094405591487885, + "loss_ce": 0.004842987284064293, + "loss_iou": 0.248046875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 855474248, + "step": 8823 + }, + { + "epoch": 0.8627297614391866, + "grad_norm": 2.851810780217878, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 855571680, + "step": 8824 + }, + { + "epoch": 0.8627297614391866, + "loss": 0.07976128160953522, + "loss_ce": 0.005634085275232792, + "loss_iou": 0.345703125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 855571680, + "step": 8824 + }, + { + "epoch": 0.8628275322643724, + "grad_norm": 3.205420276769842, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 855667740, + "step": 8825 + }, + { + "epoch": 0.8628275322643724, + "loss": 0.06251394003629684, + "loss_ce": 0.005888574756681919, + "loss_iou": 0.232421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 855667740, + "step": 8825 + }, + { + "epoch": 0.8629253030895581, + "grad_norm": 7.881871406633035, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 855764916, + "step": 8826 + }, + { + "epoch": 0.8629253030895581, + "loss": 0.12916812300682068, + "loss_ce": 0.008860202506184578, + "loss_iou": 0.32421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 855764916, + "step": 8826 + }, + { + "epoch": 0.8630230739147439, + "grad_norm": 40.34214619807114, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 855862316, + "step": 8827 + }, + { + "epoch": 0.8630230739147439, + "loss": 0.09598714113235474, + "loss_ce": 0.003976641222834587, + "loss_iou": 0.34765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 855862316, + "step": 8827 + }, + { + "epoch": 0.8631208447399296, + "grad_norm": 18.440023929278958, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 855958540, + "step": 8828 + }, + { + "epoch": 0.8631208447399296, + "loss": 0.04584251344203949, + "loss_ce": 0.0033162683248519897, + "loss_iou": 0.287109375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 855958540, + "step": 8828 + }, + { + "epoch": 0.8632186155651154, + "grad_norm": 5.978398272372801, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 856055100, + "step": 8829 + }, + { + "epoch": 0.8632186155651154, + "loss": 0.07736404240131378, + "loss_ce": 0.006723480299115181, + "loss_iou": 0.2734375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 856055100, + "step": 8829 + }, + { + "epoch": 0.8633163863903012, + "grad_norm": 13.624975837734997, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 856153388, + "step": 8830 + }, + { + "epoch": 0.8633163863903012, + "loss": 0.07653515040874481, + "loss_ce": 0.0061616189777851105, + "loss_iou": 0.408203125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 856153388, + "step": 8830 + }, + { + "epoch": 0.8634141572154869, + "grad_norm": 9.23571012128028, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 856250240, + "step": 8831 + }, + { + "epoch": 0.8634141572154869, + "loss": 0.07878361642360687, + "loss_ce": 0.006060222629457712, + "loss_iou": 0.2890625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 856250240, + "step": 8831 + }, + { + "epoch": 0.8635119280406727, + "grad_norm": 51.77513524566762, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 856347280, + "step": 8832 + }, + { + "epoch": 0.8635119280406727, + "loss": 0.05205933004617691, + "loss_ce": 0.006738821044564247, + "loss_iou": 0.263671875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 856347280, + "step": 8832 + }, + { + "epoch": 0.8636096988658585, + "grad_norm": 12.606349221903603, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 856443208, + "step": 8833 + }, + { + "epoch": 0.8636096988658585, + "loss": 0.0504305474460125, + "loss_ce": 0.005565892439335585, + "loss_iou": 0.255859375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 856443208, + "step": 8833 + }, + { + "epoch": 0.8637074696910442, + "grad_norm": 9.596308573635195, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 856540944, + "step": 8834 + }, + { + "epoch": 0.8637074696910442, + "loss": 0.06007145717740059, + "loss_ce": 0.006825912743806839, + "loss_iou": 0.279296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 856540944, + "step": 8834 + }, + { + "epoch": 0.86380524051623, + "grad_norm": 5.894087416558894, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 856637424, + "step": 8835 + }, + { + "epoch": 0.86380524051623, + "loss": 0.050231464207172394, + "loss_ce": 0.006835466250777245, + "loss_iou": 0.234375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 856637424, + "step": 8835 + }, + { + "epoch": 0.8639030113414157, + "grad_norm": 3.193712128203744, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 856734300, + "step": 8836 + }, + { + "epoch": 0.8639030113414157, + "loss": 0.06584549695253372, + "loss_ce": 0.006381997372955084, + "loss_iou": 0.228515625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 856734300, + "step": 8836 + }, + { + "epoch": 0.8640007821666015, + "grad_norm": 2.599168844554362, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 856831180, + "step": 8837 + }, + { + "epoch": 0.8640007821666015, + "loss": 0.06179584935307503, + "loss_ce": 0.010244028642773628, + "loss_iou": 0.2470703125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 856831180, + "step": 8837 + }, + { + "epoch": 0.8640985529917873, + "grad_norm": 7.978114441756608, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 856927952, + "step": 8838 + }, + { + "epoch": 0.8640985529917873, + "loss": 0.10212169587612152, + "loss_ce": 0.007791861426085234, + "loss_iou": 0.359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 856927952, + "step": 8838 + }, + { + "epoch": 0.864196323816973, + "grad_norm": 29.83993390798928, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 857024740, + "step": 8839 + }, + { + "epoch": 0.864196323816973, + "loss": 0.04989108443260193, + "loss_ce": 0.0025659503880888224, + "loss_iou": 0.30078125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 857024740, + "step": 8839 + }, + { + "epoch": 0.8642940946421588, + "grad_norm": 5.6363332789612794, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 857120956, + "step": 8840 + }, + { + "epoch": 0.8642940946421588, + "loss": 0.0819326639175415, + "loss_ce": 0.008881209418177605, + "loss_iou": 0.21484375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 857120956, + "step": 8840 + }, + { + "epoch": 0.8643918654673446, + "grad_norm": 54.040629786520086, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 857217588, + "step": 8841 + }, + { + "epoch": 0.8643918654673446, + "loss": 0.08653055131435394, + "loss_ce": 0.006101482547819614, + "loss_iou": 0.341796875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 857217588, + "step": 8841 + }, + { + "epoch": 0.8644896362925303, + "grad_norm": 11.121232589003668, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 857314004, + "step": 8842 + }, + { + "epoch": 0.8644896362925303, + "loss": 0.05840262025594711, + "loss_ce": 0.007796848192811012, + "loss_iou": 0.267578125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 857314004, + "step": 8842 + }, + { + "epoch": 0.8645874071177161, + "grad_norm": 4.727672525360356, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 857410808, + "step": 8843 + }, + { + "epoch": 0.8645874071177161, + "loss": 0.0545298233628273, + "loss_ce": 0.007021581754088402, + "loss_iou": 0.25390625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 857410808, + "step": 8843 + }, + { + "epoch": 0.8646851779429018, + "grad_norm": 4.988621575800016, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 857507972, + "step": 8844 + }, + { + "epoch": 0.8646851779429018, + "loss": 0.11707168817520142, + "loss_ce": 0.008062899112701416, + "loss_iou": 0.43359375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 857507972, + "step": 8844 + }, + { + "epoch": 0.8647829487680876, + "grad_norm": 4.547481694339761, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 857604496, + "step": 8845 + }, + { + "epoch": 0.8647829487680876, + "loss": 0.11020933091640472, + "loss_ce": 0.00915418192744255, + "loss_iou": 0.388671875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 857604496, + "step": 8845 + }, + { + "epoch": 0.8648807195932734, + "grad_norm": 3.4389780213664753, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 857701036, + "step": 8846 + }, + { + "epoch": 0.8648807195932734, + "loss": 0.07387691736221313, + "loss_ce": 0.0021033938974142075, + "loss_iou": 0.23828125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 857701036, + "step": 8846 + }, + { + "epoch": 0.8649784904184591, + "grad_norm": 6.392832393898762, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 857797360, + "step": 8847 + }, + { + "epoch": 0.8649784904184591, + "loss": 0.07761919498443604, + "loss_ce": 0.0057245478965342045, + "loss_iou": 0.166015625, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 857797360, + "step": 8847 + }, + { + "epoch": 0.8650762612436449, + "grad_norm": 5.088651066167768, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 857894168, + "step": 8848 + }, + { + "epoch": 0.8650762612436449, + "loss": 0.07632161676883698, + "loss_ce": 0.006344811990857124, + "loss_iou": 0.349609375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 857894168, + "step": 8848 + }, + { + "epoch": 0.8651740320688307, + "grad_norm": 4.8244205226078245, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 857990872, + "step": 8849 + }, + { + "epoch": 0.8651740320688307, + "loss": 0.06605629622936249, + "loss_ce": 0.009621668606996536, + "loss_iou": 0.265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 857990872, + "step": 8849 + }, + { + "epoch": 0.8652718028940164, + "grad_norm": 12.717364734691905, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 858087856, + "step": 8850 + }, + { + "epoch": 0.8652718028940164, + "loss": 0.07860088348388672, + "loss_ce": 0.005206110887229443, + "loss_iou": 0.458984375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 858087856, + "step": 8850 + }, + { + "epoch": 0.8653695737192022, + "grad_norm": 19.730371900418785, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 858184720, + "step": 8851 + }, + { + "epoch": 0.8653695737192022, + "loss": 0.08872497826814651, + "loss_ce": 0.010450253263115883, + "loss_iou": 0.2158203125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 858184720, + "step": 8851 + }, + { + "epoch": 0.865467344544388, + "grad_norm": 3.576909432487042, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 858282744, + "step": 8852 + }, + { + "epoch": 0.865467344544388, + "loss": 0.05432906001806259, + "loss_ce": 0.003204488195478916, + "loss_iou": 0.29296875, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 858282744, + "step": 8852 + }, + { + "epoch": 0.8655651153695737, + "grad_norm": 4.751463825825742, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 858379412, + "step": 8853 + }, + { + "epoch": 0.8655651153695737, + "loss": 0.11678184568881989, + "loss_ce": 0.009283676743507385, + "loss_iou": 0.314453125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 858379412, + "step": 8853 + }, + { + "epoch": 0.8656628861947595, + "grad_norm": 5.844203800408145, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 858475852, + "step": 8854 + }, + { + "epoch": 0.8656628861947595, + "loss": 0.06337878108024597, + "loss_ce": 0.005837888456881046, + "loss_iou": 0.279296875, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 858475852, + "step": 8854 + }, + { + "epoch": 0.8657606570199452, + "grad_norm": 12.381904762924467, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 858572100, + "step": 8855 + }, + { + "epoch": 0.8657606570199452, + "loss": 0.04679363965988159, + "loss_ce": 0.00338238338008523, + "loss_iou": 0.255859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 858572100, + "step": 8855 + }, + { + "epoch": 0.865858427845131, + "grad_norm": 20.58229116417094, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 858669112, + "step": 8856 + }, + { + "epoch": 0.865858427845131, + "loss": 0.12525545060634613, + "loss_ce": 0.004424921702593565, + "loss_iou": 0.271484375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 858669112, + "step": 8856 + }, + { + "epoch": 0.8659561986703168, + "grad_norm": 2.543603336708374, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 858765952, + "step": 8857 + }, + { + "epoch": 0.8659561986703168, + "loss": 0.10722238570451736, + "loss_ce": 0.007368863560259342, + "loss_iou": 0.384765625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 858765952, + "step": 8857 + }, + { + "epoch": 0.8660539694955025, + "grad_norm": 2.9837178055750364, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 858862472, + "step": 8858 + }, + { + "epoch": 0.8660539694955025, + "loss": 0.06911292672157288, + "loss_ce": 0.0018521868623793125, + "loss_iou": 0.283203125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 858862472, + "step": 8858 + }, + { + "epoch": 0.8661517403206883, + "grad_norm": 4.362448758012889, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 858959656, + "step": 8859 + }, + { + "epoch": 0.8661517403206883, + "loss": 0.10552184283733368, + "loss_ce": 0.008926081471145153, + "loss_iou": 0.236328125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 858959656, + "step": 8859 + }, + { + "epoch": 0.8662495111458741, + "grad_norm": 9.834962740346091, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 859055236, + "step": 8860 + }, + { + "epoch": 0.8662495111458741, + "loss": 0.11006797850131989, + "loss_ce": 0.0073763346299529076, + "loss_iou": 0.26953125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 859055236, + "step": 8860 + }, + { + "epoch": 0.8663472819710598, + "grad_norm": 4.926266708647913, + "learning_rate": 5e-05, + "loss": 0.1146, + "num_input_tokens_seen": 859151796, + "step": 8861 + }, + { + "epoch": 0.8663472819710598, + "loss": 0.11088043451309204, + "loss_ce": 0.004946291446685791, + "loss_iou": 0.322265625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 859151796, + "step": 8861 + }, + { + "epoch": 0.8664450527962456, + "grad_norm": 4.830287105779385, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 859249036, + "step": 8862 + }, + { + "epoch": 0.8664450527962456, + "loss": 0.06613849848508835, + "loss_ce": 0.0070183165371418, + "loss_iou": 0.302734375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 859249036, + "step": 8862 + }, + { + "epoch": 0.8665428236214313, + "grad_norm": 15.433814430343721, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 859346716, + "step": 8863 + }, + { + "epoch": 0.8665428236214313, + "loss": 0.059177465736866, + "loss_ce": 0.0031548193655908108, + "loss_iou": 0.314453125, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 859346716, + "step": 8863 + }, + { + "epoch": 0.8666405944466171, + "grad_norm": 18.25713982365722, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 859443692, + "step": 8864 + }, + { + "epoch": 0.8666405944466171, + "loss": 0.11790577322244644, + "loss_ce": 0.004075206350535154, + "loss_iou": 0.2734375, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 859443692, + "step": 8864 + }, + { + "epoch": 0.8667383652718029, + "grad_norm": 4.358567579516165, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 859540088, + "step": 8865 + }, + { + "epoch": 0.8667383652718029, + "loss": 0.0766805112361908, + "loss_ce": 0.0059560188092291355, + "loss_iou": 0.3984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 859540088, + "step": 8865 + }, + { + "epoch": 0.8668361360969886, + "grad_norm": 8.834506375040535, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 859636924, + "step": 8866 + }, + { + "epoch": 0.8668361360969886, + "loss": 0.06300298869609833, + "loss_ce": 0.0041345772333443165, + "loss_iou": 0.296875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 859636924, + "step": 8866 + }, + { + "epoch": 0.8669339069221744, + "grad_norm": 7.115071781023071, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 859734048, + "step": 8867 + }, + { + "epoch": 0.8669339069221744, + "loss": 0.08408118784427643, + "loss_ce": 0.005986703559756279, + "loss_iou": 0.361328125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 859734048, + "step": 8867 + }, + { + "epoch": 0.8670316777473602, + "grad_norm": 13.048216162396018, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 859831200, + "step": 8868 + }, + { + "epoch": 0.8670316777473602, + "loss": 0.06735426187515259, + "loss_ce": 0.0038471785373985767, + "loss_iou": 0.248046875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 859831200, + "step": 8868 + }, + { + "epoch": 0.8671294485725459, + "grad_norm": 3.366208428136222, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 859927080, + "step": 8869 + }, + { + "epoch": 0.8671294485725459, + "loss": 0.04066641256213188, + "loss_ce": 0.0010393382981419563, + "loss_iou": 0.298828125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 859927080, + "step": 8869 + }, + { + "epoch": 0.8672272193977317, + "grad_norm": 6.131393485987489, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 860023840, + "step": 8870 + }, + { + "epoch": 0.8672272193977317, + "loss": 0.067778080701828, + "loss_ce": 0.0032181446440517902, + "loss_iou": 0.2197265625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 860023840, + "step": 8870 + }, + { + "epoch": 0.8673249902229175, + "grad_norm": 4.623472922027223, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 860120336, + "step": 8871 + }, + { + "epoch": 0.8673249902229175, + "loss": 0.06019941717386246, + "loss_ce": 0.0031086583621799946, + "loss_iou": 0.1923828125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 860120336, + "step": 8871 + }, + { + "epoch": 0.8674227610481032, + "grad_norm": 5.730197590713022, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 860216588, + "step": 8872 + }, + { + "epoch": 0.8674227610481032, + "loss": 0.08383374661207199, + "loss_ce": 0.00770002044737339, + "loss_iou": 0.23828125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 860216588, + "step": 8872 + }, + { + "epoch": 0.867520531873289, + "grad_norm": 3.817763146441393, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 860312976, + "step": 8873 + }, + { + "epoch": 0.867520531873289, + "loss": 0.08835314214229584, + "loss_ce": 0.006348602473735809, + "loss_iou": 0.287109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 860312976, + "step": 8873 + }, + { + "epoch": 0.8676183026984747, + "grad_norm": 12.956726763974475, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 860410172, + "step": 8874 + }, + { + "epoch": 0.8676183026984747, + "loss": 0.09126290678977966, + "loss_ce": 0.0054169632494449615, + "loss_iou": 0.341796875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 860410172, + "step": 8874 + }, + { + "epoch": 0.8677160735236605, + "grad_norm": 12.21353708809938, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 860506724, + "step": 8875 + }, + { + "epoch": 0.8677160735236605, + "loss": 0.11379285901784897, + "loss_ce": 0.004326304886490107, + "loss_iou": 0.30078125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 860506724, + "step": 8875 + }, + { + "epoch": 0.8678138443488463, + "grad_norm": 8.00966414906836, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 860603592, + "step": 8876 + }, + { + "epoch": 0.8678138443488463, + "loss": 0.09351968765258789, + "loss_ce": 0.0058579458855092525, + "loss_iou": 0.353515625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 860603592, + "step": 8876 + }, + { + "epoch": 0.867911615174032, + "grad_norm": 2.80179211687294, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 860701228, + "step": 8877 + }, + { + "epoch": 0.867911615174032, + "loss": 0.04243544116616249, + "loss_ce": 0.009934219531714916, + "loss_iou": 0.306640625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 860701228, + "step": 8877 + }, + { + "epoch": 0.8680093859992178, + "grad_norm": 8.666050646065939, + "learning_rate": 5e-05, + "loss": 0.1303, + "num_input_tokens_seen": 860797604, + "step": 8878 + }, + { + "epoch": 0.8680093859992178, + "loss": 0.12690797448158264, + "loss_ce": 0.006813685409724712, + "loss_iou": 0.2197265625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 860797604, + "step": 8878 + }, + { + "epoch": 0.8681071568244036, + "grad_norm": 3.646095941205027, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 860894668, + "step": 8879 + }, + { + "epoch": 0.8681071568244036, + "loss": 0.062637098133564, + "loss_ce": 0.006524811033159494, + "loss_iou": 0.171875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 860894668, + "step": 8879 + }, + { + "epoch": 0.8682049276495893, + "grad_norm": 8.007739094723908, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 860991624, + "step": 8880 + }, + { + "epoch": 0.8682049276495893, + "loss": 0.04773154482245445, + "loss_ce": 0.007021098863333464, + "loss_iou": 0.2119140625, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 860991624, + "step": 8880 + }, + { + "epoch": 0.8683026984747751, + "grad_norm": 4.325324934970058, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 861088084, + "step": 8881 + }, + { + "epoch": 0.8683026984747751, + "loss": 0.05030375346541405, + "loss_ce": 0.009639079682528973, + "loss_iou": 0.310546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 861088084, + "step": 8881 + }, + { + "epoch": 0.8684004692999608, + "grad_norm": 4.183422302737446, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 861185032, + "step": 8882 + }, + { + "epoch": 0.8684004692999608, + "loss": 0.06366795301437378, + "loss_ce": 0.006569557823240757, + "loss_iou": 0.259765625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 861185032, + "step": 8882 + }, + { + "epoch": 0.8684982401251466, + "grad_norm": 4.174758896860805, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 861281488, + "step": 8883 + }, + { + "epoch": 0.8684982401251466, + "loss": 0.07778257131576538, + "loss_ce": 0.00428098626434803, + "loss_iou": 0.265625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 861281488, + "step": 8883 + }, + { + "epoch": 0.8685960109503325, + "grad_norm": 2.858723056604767, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 861378416, + "step": 8884 + }, + { + "epoch": 0.8685960109503325, + "loss": 0.06631401181221008, + "loss_ce": 0.004729539155960083, + "loss_iou": 0.2021484375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 861378416, + "step": 8884 + }, + { + "epoch": 0.8686937817755181, + "grad_norm": 7.4268289388794475, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 861474536, + "step": 8885 + }, + { + "epoch": 0.8686937817755181, + "loss": 0.08121702075004578, + "loss_ce": 0.005731792654842138, + "loss_iou": 0.251953125, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 861474536, + "step": 8885 + }, + { + "epoch": 0.868791552600704, + "grad_norm": 10.211286637578159, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 861571532, + "step": 8886 + }, + { + "epoch": 0.868791552600704, + "loss": 0.045089803636074066, + "loss_ce": 0.010665974579751492, + "loss_iou": 0.236328125, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 861571532, + "step": 8886 + }, + { + "epoch": 0.8688893234258898, + "grad_norm": 7.499635974991916, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 861667140, + "step": 8887 + }, + { + "epoch": 0.8688893234258898, + "loss": 0.06519471108913422, + "loss_ce": 0.0068527329713106155, + "loss_iou": 0.2041015625, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 861667140, + "step": 8887 + }, + { + "epoch": 0.8689870942510755, + "grad_norm": 2.53539443833173, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 861763168, + "step": 8888 + }, + { + "epoch": 0.8689870942510755, + "loss": 0.048251762986183167, + "loss_ce": 0.004886283073574305, + "loss_iou": 0.2890625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 861763168, + "step": 8888 + }, + { + "epoch": 0.8690848650762613, + "grad_norm": 10.23049165661062, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 861860276, + "step": 8889 + }, + { + "epoch": 0.8690848650762613, + "loss": 0.04421747103333473, + "loss_ce": 0.004773503169417381, + "loss_iou": 0.306640625, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 861860276, + "step": 8889 + }, + { + "epoch": 0.869182635901447, + "grad_norm": 7.086849643313755, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 861957312, + "step": 8890 + }, + { + "epoch": 0.869182635901447, + "loss": 0.05204709619283676, + "loss_ce": 0.007888163439929485, + "loss_iou": 0.26171875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 861957312, + "step": 8890 + }, + { + "epoch": 0.8692804067266328, + "grad_norm": 4.818129312338462, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 862053816, + "step": 8891 + }, + { + "epoch": 0.8692804067266328, + "loss": 0.07359469681978226, + "loss_ce": 0.007249483838677406, + "loss_iou": 0.32421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 862053816, + "step": 8891 + }, + { + "epoch": 0.8693781775518186, + "grad_norm": 3.4977065689662976, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 862150128, + "step": 8892 + }, + { + "epoch": 0.8693781775518186, + "loss": 0.06765037775039673, + "loss_ce": 0.003044664394110441, + "loss_iou": 0.24609375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 862150128, + "step": 8892 + }, + { + "epoch": 0.8694759483770043, + "grad_norm": 7.87446035977523, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 862246984, + "step": 8893 + }, + { + "epoch": 0.8694759483770043, + "loss": 0.04542948305606842, + "loss_ce": 0.002628576010465622, + "loss_iou": 0.419921875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 862246984, + "step": 8893 + }, + { + "epoch": 0.8695737192021901, + "grad_norm": 3.838954006194888, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 862343676, + "step": 8894 + }, + { + "epoch": 0.8695737192021901, + "loss": 0.05384324491024017, + "loss_ce": 0.005114302039146423, + "loss_iou": 0.37109375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 862343676, + "step": 8894 + }, + { + "epoch": 0.8696714900273759, + "grad_norm": 7.354373432846824, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 862440348, + "step": 8895 + }, + { + "epoch": 0.8696714900273759, + "loss": 0.06934210658073425, + "loss_ce": 0.006094423588365316, + "loss_iou": 0.287109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 862440348, + "step": 8895 + }, + { + "epoch": 0.8697692608525616, + "grad_norm": 8.168310782768161, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 862539056, + "step": 8896 + }, + { + "epoch": 0.8697692608525616, + "loss": 0.06265969574451447, + "loss_ce": 0.005530789028853178, + "loss_iou": 0.3046875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 862539056, + "step": 8896 + }, + { + "epoch": 0.8698670316777474, + "grad_norm": 5.484499513291598, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 862635484, + "step": 8897 + }, + { + "epoch": 0.8698670316777474, + "loss": 0.051312174648046494, + "loss_ce": 0.004994121380150318, + "loss_iou": 0.29296875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 862635484, + "step": 8897 + }, + { + "epoch": 0.8699648025029332, + "grad_norm": 5.421113308178453, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 862732696, + "step": 8898 + }, + { + "epoch": 0.8699648025029332, + "loss": 0.07741734385490417, + "loss_ce": 0.0030917853582650423, + "loss_iou": 0.365234375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 862732696, + "step": 8898 + }, + { + "epoch": 0.8700625733281189, + "grad_norm": 7.311344572552634, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 862829088, + "step": 8899 + }, + { + "epoch": 0.8700625733281189, + "loss": 0.10000180453062057, + "loss_ce": 0.009730807505548, + "loss_iou": 0.23046875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 862829088, + "step": 8899 + }, + { + "epoch": 0.8701603441533047, + "grad_norm": 8.042573660902583, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 862926388, + "step": 8900 + }, + { + "epoch": 0.8701603441533047, + "loss": 0.06427162885665894, + "loss_ce": 0.005075159482657909, + "loss_iou": 0.208984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 862926388, + "step": 8900 + }, + { + "epoch": 0.8702581149784904, + "grad_norm": 9.725670474763197, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 863023908, + "step": 8901 + }, + { + "epoch": 0.8702581149784904, + "loss": 0.05926187336444855, + "loss_ce": 0.007984710857272148, + "loss_iou": 0.265625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 863023908, + "step": 8901 + }, + { + "epoch": 0.8703558858036762, + "grad_norm": 27.883448244188475, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 863120068, + "step": 8902 + }, + { + "epoch": 0.8703558858036762, + "loss": 0.08120624721050262, + "loss_ce": 0.006613660603761673, + "loss_iou": 0.2451171875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 863120068, + "step": 8902 + }, + { + "epoch": 0.870453656628862, + "grad_norm": 11.522607740890932, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 863216676, + "step": 8903 + }, + { + "epoch": 0.870453656628862, + "loss": 0.06025528907775879, + "loss_ce": 0.006620646920055151, + "loss_iou": 0.240234375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 863216676, + "step": 8903 + }, + { + "epoch": 0.8705514274540477, + "grad_norm": 8.383925906134266, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 863313492, + "step": 8904 + }, + { + "epoch": 0.8705514274540477, + "loss": 0.035728827118873596, + "loss_ce": 0.005012886598706245, + "loss_iou": 0.2353515625, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 863313492, + "step": 8904 + }, + { + "epoch": 0.8706491982792335, + "grad_norm": 4.755776151185718, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 863409928, + "step": 8905 + }, + { + "epoch": 0.8706491982792335, + "loss": 0.08582468330860138, + "loss_ce": 0.006433201488107443, + "loss_iou": 0.345703125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 863409928, + "step": 8905 + }, + { + "epoch": 0.8707469691044193, + "grad_norm": 22.203936882682953, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 863507156, + "step": 8906 + }, + { + "epoch": 0.8707469691044193, + "loss": 0.05967789888381958, + "loss_ce": 0.004189314320683479, + "loss_iou": 0.30078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 863507156, + "step": 8906 + }, + { + "epoch": 0.870844739929605, + "grad_norm": 3.209968199319602, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 863604516, + "step": 8907 + }, + { + "epoch": 0.870844739929605, + "loss": 0.06566964089870453, + "loss_ce": 0.008357631042599678, + "loss_iou": 0.234375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 863604516, + "step": 8907 + }, + { + "epoch": 0.8709425107547908, + "grad_norm": 20.75872266510712, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 863701100, + "step": 8908 + }, + { + "epoch": 0.8709425107547908, + "loss": 0.05922490730881691, + "loss_ce": 0.0038431291468441486, + "loss_iou": 0.1728515625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 863701100, + "step": 8908 + }, + { + "epoch": 0.8710402815799765, + "grad_norm": 7.145969573627416, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 863797632, + "step": 8909 + }, + { + "epoch": 0.8710402815799765, + "loss": 0.11592933535575867, + "loss_ce": 0.005913468077778816, + "loss_iou": 0.20703125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 863797632, + "step": 8909 + }, + { + "epoch": 0.8711380524051623, + "grad_norm": 10.892746665490543, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 863894680, + "step": 8910 + }, + { + "epoch": 0.8711380524051623, + "loss": 0.11662761867046356, + "loss_ce": 0.012501634657382965, + "loss_iou": 0.2578125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 863894680, + "step": 8910 + }, + { + "epoch": 0.8712358232303481, + "grad_norm": 12.955650413194107, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 863992480, + "step": 8911 + }, + { + "epoch": 0.8712358232303481, + "loss": 0.04855239391326904, + "loss_ce": 0.004591825418174267, + "loss_iou": 0.369140625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 863992480, + "step": 8911 + }, + { + "epoch": 0.8713335940555338, + "grad_norm": 14.803614350486647, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 864089472, + "step": 8912 + }, + { + "epoch": 0.8713335940555338, + "loss": 0.08073568344116211, + "loss_ce": 0.005952355917543173, + "loss_iou": 0.32421875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 864089472, + "step": 8912 + }, + { + "epoch": 0.8714313648807196, + "grad_norm": 17.1689063783244, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 864186800, + "step": 8913 + }, + { + "epoch": 0.8714313648807196, + "loss": 0.04154977947473526, + "loss_ce": 0.00812540203332901, + "loss_iou": 0.2197265625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 864186800, + "step": 8913 + }, + { + "epoch": 0.8715291357059054, + "grad_norm": 9.529677007949216, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 864284124, + "step": 8914 + }, + { + "epoch": 0.8715291357059054, + "loss": 0.05779143422842026, + "loss_ce": 0.0036074735689908266, + "loss_iou": 0.34765625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 864284124, + "step": 8914 + }, + { + "epoch": 0.8716269065310911, + "grad_norm": 4.5885125515932, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 864381140, + "step": 8915 + }, + { + "epoch": 0.8716269065310911, + "loss": 0.050604499876499176, + "loss_ce": 0.006003058515489101, + "loss_iou": 0.2412109375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 864381140, + "step": 8915 + }, + { + "epoch": 0.8717246773562769, + "grad_norm": 6.50996887345527, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 864478564, + "step": 8916 + }, + { + "epoch": 0.8717246773562769, + "loss": 0.08190310001373291, + "loss_ce": 0.010553007014095783, + "loss_iou": 0.287109375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 864478564, + "step": 8916 + }, + { + "epoch": 0.8718224481814627, + "grad_norm": 19.500167590259824, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 864574888, + "step": 8917 + }, + { + "epoch": 0.8718224481814627, + "loss": 0.09019852429628372, + "loss_ce": 0.004993447568267584, + "loss_iou": 0.2177734375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 864574888, + "step": 8917 + }, + { + "epoch": 0.8719202190066484, + "grad_norm": 9.572012471880658, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 864672792, + "step": 8918 + }, + { + "epoch": 0.8719202190066484, + "loss": 0.06499700248241425, + "loss_ce": 0.00496129784733057, + "loss_iou": 0.3125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 864672792, + "step": 8918 + }, + { + "epoch": 0.8720179898318342, + "grad_norm": 10.071591266372836, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 864769280, + "step": 8919 + }, + { + "epoch": 0.8720179898318342, + "loss": 0.08047837018966675, + "loss_ce": 0.007419287227094173, + "loss_iou": 0.314453125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 864769280, + "step": 8919 + }, + { + "epoch": 0.8721157606570199, + "grad_norm": 7.16956695707181, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 864867284, + "step": 8920 + }, + { + "epoch": 0.8721157606570199, + "loss": 0.09533055126667023, + "loss_ce": 0.004060102626681328, + "loss_iou": 0.2470703125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 864867284, + "step": 8920 + }, + { + "epoch": 0.8722135314822057, + "grad_norm": 11.72597757731487, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 864964020, + "step": 8921 + }, + { + "epoch": 0.8722135314822057, + "loss": 0.08388987183570862, + "loss_ce": 0.005421551875770092, + "loss_iou": 0.291015625, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 864964020, + "step": 8921 + }, + { + "epoch": 0.8723113023073915, + "grad_norm": 3.218050373702503, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 865060380, + "step": 8922 + }, + { + "epoch": 0.8723113023073915, + "loss": 0.08169084787368774, + "loss_ce": 0.007563648279756308, + "loss_iou": 0.236328125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 865060380, + "step": 8922 + }, + { + "epoch": 0.8724090731325772, + "grad_norm": 67.81295622192295, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 865157564, + "step": 8923 + }, + { + "epoch": 0.8724090731325772, + "loss": 0.0666908323764801, + "loss_ce": 0.006113443057984114, + "loss_iou": 0.2236328125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 865157564, + "step": 8923 + }, + { + "epoch": 0.872506843957763, + "grad_norm": 19.951790580457736, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 865254976, + "step": 8924 + }, + { + "epoch": 0.872506843957763, + "loss": 0.13518735766410828, + "loss_ce": 0.007837506011128426, + "loss_iou": 0.361328125, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 865254976, + "step": 8924 + }, + { + "epoch": 0.8726046147829488, + "grad_norm": 4.706842623806513, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 865351592, + "step": 8925 + }, + { + "epoch": 0.8726046147829488, + "loss": 0.06109163165092468, + "loss_ce": 0.002185078337788582, + "loss_iou": 0.326171875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 865351592, + "step": 8925 + }, + { + "epoch": 0.8727023856081345, + "grad_norm": 6.7988041780962, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 865448852, + "step": 8926 + }, + { + "epoch": 0.8727023856081345, + "loss": 0.06879207491874695, + "loss_ce": 0.004522052593529224, + "loss_iou": 0.28125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 865448852, + "step": 8926 + }, + { + "epoch": 0.8728001564333203, + "grad_norm": 14.78500935596329, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 865545636, + "step": 8927 + }, + { + "epoch": 0.8728001564333203, + "loss": 0.048783980309963226, + "loss_ce": 0.0065896171145141125, + "loss_iou": 0.1826171875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 865545636, + "step": 8927 + }, + { + "epoch": 0.872897927258506, + "grad_norm": 22.040636264763847, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 865643020, + "step": 8928 + }, + { + "epoch": 0.872897927258506, + "loss": 0.07955043762922287, + "loss_ce": 0.003592181019484997, + "loss_iou": 0.28125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 865643020, + "step": 8928 + }, + { + "epoch": 0.8729956980836918, + "grad_norm": 7.447755325516118, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 865740704, + "step": 8929 + }, + { + "epoch": 0.8729956980836918, + "loss": 0.10423624515533447, + "loss_ce": 0.004733691923320293, + "loss_iou": 0.251953125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 865740704, + "step": 8929 + }, + { + "epoch": 0.8730934689088776, + "grad_norm": 12.473378916064506, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 865839060, + "step": 8930 + }, + { + "epoch": 0.8730934689088776, + "loss": 0.09857600927352905, + "loss_ce": 0.00355952768586576, + "loss_iou": 0.314453125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 865839060, + "step": 8930 + }, + { + "epoch": 0.8731912397340633, + "grad_norm": 13.790634787349704, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 865935444, + "step": 8931 + }, + { + "epoch": 0.8731912397340633, + "loss": 0.08631788194179535, + "loss_ce": 0.004576549399644136, + "loss_iou": 0.2158203125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 865935444, + "step": 8931 + }, + { + "epoch": 0.8732890105592491, + "grad_norm": 5.899289933454416, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 866033684, + "step": 8932 + }, + { + "epoch": 0.8732890105592491, + "loss": 0.07861560583114624, + "loss_ce": 0.0029701548628509045, + "loss_iou": 0.28125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 866033684, + "step": 8932 + }, + { + "epoch": 0.8733867813844349, + "grad_norm": 7.138353674625826, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 866130716, + "step": 8933 + }, + { + "epoch": 0.8733867813844349, + "loss": 0.04945267736911774, + "loss_ce": 0.0034169091377407312, + "loss_iou": 0.255859375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 866130716, + "step": 8933 + }, + { + "epoch": 0.8734845522096206, + "grad_norm": 21.706907549531394, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 866226620, + "step": 8934 + }, + { + "epoch": 0.8734845522096206, + "loss": 0.05792864412069321, + "loss_ce": 0.004828056786209345, + "loss_iou": 0.29296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 866226620, + "step": 8934 + }, + { + "epoch": 0.8735823230348064, + "grad_norm": 10.048022778969342, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 866324392, + "step": 8935 + }, + { + "epoch": 0.8735823230348064, + "loss": 0.06363776326179504, + "loss_ce": 0.00484564621001482, + "loss_iou": 0.28125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 866324392, + "step": 8935 + }, + { + "epoch": 0.8736800938599921, + "grad_norm": 7.409941245815784, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 866421688, + "step": 8936 + }, + { + "epoch": 0.8736800938599921, + "loss": 0.0456903874874115, + "loss_ce": 0.004323810338973999, + "loss_iou": 0.333984375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 866421688, + "step": 8936 + }, + { + "epoch": 0.8737778646851779, + "grad_norm": 15.863712150848334, + "learning_rate": 5e-05, + "loss": 0.1022, + "num_input_tokens_seen": 866518224, + "step": 8937 + }, + { + "epoch": 0.8737778646851779, + "loss": 0.09742341190576553, + "loss_ce": 0.004863598383963108, + "loss_iou": 0.294921875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 866518224, + "step": 8937 + }, + { + "epoch": 0.8738756355103637, + "grad_norm": 2.4612812462074998, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 866615576, + "step": 8938 + }, + { + "epoch": 0.8738756355103637, + "loss": 0.07467678189277649, + "loss_ce": 0.006515773944556713, + "loss_iou": 0.291015625, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 866615576, + "step": 8938 + }, + { + "epoch": 0.8739734063355494, + "grad_norm": 6.76880447183192, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 866711864, + "step": 8939 + }, + { + "epoch": 0.8739734063355494, + "loss": 0.056873470544815063, + "loss_ce": 0.008342892862856388, + "loss_iou": 0.3046875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 866711864, + "step": 8939 + }, + { + "epoch": 0.8740711771607352, + "grad_norm": 2.362514411952128, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 866809188, + "step": 8940 + }, + { + "epoch": 0.8740711771607352, + "loss": 0.04745835065841675, + "loss_ce": 0.008029643446207047, + "loss_iou": 0.27734375, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 866809188, + "step": 8940 + }, + { + "epoch": 0.874168947985921, + "grad_norm": 4.300278587243343, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 866905188, + "step": 8941 + }, + { + "epoch": 0.874168947985921, + "loss": 0.06124067306518555, + "loss_ce": 0.004485605750232935, + "loss_iou": 0.2197265625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 866905188, + "step": 8941 + }, + { + "epoch": 0.8742667188111067, + "grad_norm": 14.571925381596243, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 867002320, + "step": 8942 + }, + { + "epoch": 0.8742667188111067, + "loss": 0.08488351106643677, + "loss_ce": 0.01073343027383089, + "loss_iou": 0.1904296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 867002320, + "step": 8942 + }, + { + "epoch": 0.8743644896362925, + "grad_norm": 5.100549117671254, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 867099696, + "step": 8943 + }, + { + "epoch": 0.8743644896362925, + "loss": 0.06280811876058578, + "loss_ce": 0.005839431658387184, + "loss_iou": 0.2578125, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 867099696, + "step": 8943 + }, + { + "epoch": 0.8744622604614783, + "grad_norm": 11.16663817108047, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 867196320, + "step": 8944 + }, + { + "epoch": 0.8744622604614783, + "loss": 0.06375755369663239, + "loss_ce": 0.004843373317271471, + "loss_iou": 0.2734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 867196320, + "step": 8944 + }, + { + "epoch": 0.874560031286664, + "grad_norm": 15.581486298140728, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 867293876, + "step": 8945 + }, + { + "epoch": 0.874560031286664, + "loss": 0.07984303683042526, + "loss_ce": 0.0064406320452690125, + "loss_iou": 0.251953125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 867293876, + "step": 8945 + }, + { + "epoch": 0.8746578021118498, + "grad_norm": 35.513637765830396, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 867390488, + "step": 8946 + }, + { + "epoch": 0.8746578021118498, + "loss": 0.10960866510868073, + "loss_ce": 0.008046159520745277, + "loss_iou": 0.271484375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 867390488, + "step": 8946 + }, + { + "epoch": 0.8747555729370355, + "grad_norm": 31.679380500557606, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 867487036, + "step": 8947 + }, + { + "epoch": 0.8747555729370355, + "loss": 0.047127120196819305, + "loss_ce": 0.0026935250498354435, + "loss_iou": 0.279296875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 867487036, + "step": 8947 + }, + { + "epoch": 0.8748533437622213, + "grad_norm": 18.19766762146318, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 867583380, + "step": 8948 + }, + { + "epoch": 0.8748533437622213, + "loss": 0.13580861687660217, + "loss_ce": 0.010663654655218124, + "loss_iou": 0.208984375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 867583380, + "step": 8948 + }, + { + "epoch": 0.8749511145874072, + "grad_norm": 23.3212923129498, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 867680756, + "step": 8949 + }, + { + "epoch": 0.8749511145874072, + "loss": 0.07847420126199722, + "loss_ce": 0.0037976866587996483, + "loss_iou": 0.361328125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 867680756, + "step": 8949 + }, + { + "epoch": 0.8750488854125928, + "grad_norm": 6.565632135106582, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 867777336, + "step": 8950 + }, + { + "epoch": 0.8750488854125928, + "loss": 0.05736904591321945, + "loss_ce": 0.004314238205552101, + "loss_iou": 0.2333984375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 867777336, + "step": 8950 + }, + { + "epoch": 0.8751466562377787, + "grad_norm": 6.355102199535166, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 867874496, + "step": 8951 + }, + { + "epoch": 0.8751466562377787, + "loss": 0.11203768104314804, + "loss_ce": 0.006629966665059328, + "loss_iou": 0.291015625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 867874496, + "step": 8951 + }, + { + "epoch": 0.8752444270629645, + "grad_norm": 6.677752026963735, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 867971376, + "step": 8952 + }, + { + "epoch": 0.8752444270629645, + "loss": 0.07021801173686981, + "loss_ce": 0.00391857186332345, + "loss_iou": 0.32421875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 867971376, + "step": 8952 + }, + { + "epoch": 0.8753421978881502, + "grad_norm": 4.1192123033433745, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 868069032, + "step": 8953 + }, + { + "epoch": 0.8753421978881502, + "loss": 0.050756677985191345, + "loss_ce": 0.003195035271346569, + "loss_iou": 0.2578125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 868069032, + "step": 8953 + }, + { + "epoch": 0.875439968713336, + "grad_norm": 11.638918682555818, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 868165676, + "step": 8954 + }, + { + "epoch": 0.875439968713336, + "loss": 0.0624399408698082, + "loss_ce": 0.004128477536141872, + "loss_iou": 0.3203125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 868165676, + "step": 8954 + }, + { + "epoch": 0.8755377395385217, + "grad_norm": 19.915681088814846, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 868262888, + "step": 8955 + }, + { + "epoch": 0.8755377395385217, + "loss": 0.08622478693723679, + "loss_ce": 0.005444757640361786, + "loss_iou": 0.27734375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 868262888, + "step": 8955 + }, + { + "epoch": 0.8756355103637075, + "grad_norm": 8.840778631866028, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 868359116, + "step": 8956 + }, + { + "epoch": 0.8756355103637075, + "loss": 0.07039836794137955, + "loss_ce": 0.005029711872339249, + "loss_iou": 0.31640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 868359116, + "step": 8956 + }, + { + "epoch": 0.8757332811888933, + "grad_norm": 5.86772962657075, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 868456144, + "step": 8957 + }, + { + "epoch": 0.8757332811888933, + "loss": 0.07893362641334534, + "loss_ce": 0.008602054789662361, + "loss_iou": 0.234375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 868456144, + "step": 8957 + }, + { + "epoch": 0.875831052014079, + "grad_norm": 16.987751013641784, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 868552316, + "step": 8958 + }, + { + "epoch": 0.875831052014079, + "loss": 0.12905952334403992, + "loss_ce": 0.0059210993349552155, + "loss_iou": 0.4375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 868552316, + "step": 8958 + }, + { + "epoch": 0.8759288228392648, + "grad_norm": 11.507218369347015, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 868649460, + "step": 8959 + }, + { + "epoch": 0.8759288228392648, + "loss": 0.04482843726873398, + "loss_ce": 0.004865668714046478, + "loss_iou": 0.345703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 868649460, + "step": 8959 + }, + { + "epoch": 0.8760265936644506, + "grad_norm": 12.837702145145391, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 868746904, + "step": 8960 + }, + { + "epoch": 0.8760265936644506, + "loss": 0.065827377140522, + "loss_ce": 0.003968249075114727, + "loss_iou": 0.27734375, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 868746904, + "step": 8960 + }, + { + "epoch": 0.8761243644896363, + "grad_norm": 17.368605301822658, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 868843384, + "step": 8961 + }, + { + "epoch": 0.8761243644896363, + "loss": 0.06774772703647614, + "loss_ce": 0.007979050278663635, + "loss_iou": 0.3359375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 868843384, + "step": 8961 + }, + { + "epoch": 0.8762221353148221, + "grad_norm": 11.789187034899259, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 868939584, + "step": 8962 + }, + { + "epoch": 0.8762221353148221, + "loss": 0.08589296042919159, + "loss_ce": 0.008515648543834686, + "loss_iou": 0.27734375, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 868939584, + "step": 8962 + }, + { + "epoch": 0.8763199061400079, + "grad_norm": 19.111008570355946, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 869036676, + "step": 8963 + }, + { + "epoch": 0.8763199061400079, + "loss": 0.08132383227348328, + "loss_ce": 0.004709457978606224, + "loss_iou": 0.294921875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 869036676, + "step": 8963 + }, + { + "epoch": 0.8764176769651936, + "grad_norm": 19.889379748614996, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 869133180, + "step": 8964 + }, + { + "epoch": 0.8764176769651936, + "loss": 0.09344495832920074, + "loss_ce": 0.011276381090283394, + "loss_iou": 0.271484375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 869133180, + "step": 8964 + }, + { + "epoch": 0.8765154477903794, + "grad_norm": 10.720977827412982, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 869229968, + "step": 8965 + }, + { + "epoch": 0.8765154477903794, + "loss": 0.10119004547595978, + "loss_ce": 0.006371928378939629, + "loss_iou": 0.30078125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 869229968, + "step": 8965 + }, + { + "epoch": 0.8766132186155651, + "grad_norm": 2.6803147504378115, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 869327320, + "step": 8966 + }, + { + "epoch": 0.8766132186155651, + "loss": 0.07771649956703186, + "loss_ce": 0.004443794954568148, + "loss_iou": 0.34765625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 869327320, + "step": 8966 + }, + { + "epoch": 0.8767109894407509, + "grad_norm": 12.406498723994886, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 869424436, + "step": 8967 + }, + { + "epoch": 0.8767109894407509, + "loss": 0.09204071760177612, + "loss_ce": 0.008254703134298325, + "loss_iou": 0.28515625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 869424436, + "step": 8967 + }, + { + "epoch": 0.8768087602659367, + "grad_norm": 13.224524859967913, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 869521456, + "step": 8968 + }, + { + "epoch": 0.8768087602659367, + "loss": 0.08355946093797684, + "loss_ce": 0.005098770372569561, + "loss_iou": 0.32421875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 869521456, + "step": 8968 + }, + { + "epoch": 0.8769065310911224, + "grad_norm": 15.563915411993097, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 869618800, + "step": 8969 + }, + { + "epoch": 0.8769065310911224, + "loss": 0.060407448559999466, + "loss_ce": 0.004529764875769615, + "loss_iou": 0.2431640625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 869618800, + "step": 8969 + }, + { + "epoch": 0.8770043019163082, + "grad_norm": 5.14841274197479, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 869716032, + "step": 8970 + }, + { + "epoch": 0.8770043019163082, + "loss": 0.10099107027053833, + "loss_ce": 0.0030525291804224253, + "loss_iou": 0.3203125, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 869716032, + "step": 8970 + }, + { + "epoch": 0.877102072741494, + "grad_norm": 9.541241982530018, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 869814192, + "step": 8971 + }, + { + "epoch": 0.877102072741494, + "loss": 0.050597503781318665, + "loss_ce": 0.0029595629312098026, + "loss_iou": 0.333984375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 869814192, + "step": 8971 + }, + { + "epoch": 0.8771998435666797, + "grad_norm": 12.47816652938392, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 869911784, + "step": 8972 + }, + { + "epoch": 0.8771998435666797, + "loss": 0.10628305375576019, + "loss_ce": 0.006002296693623066, + "loss_iou": 0.3046875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 869911784, + "step": 8972 + }, + { + "epoch": 0.8772976143918655, + "grad_norm": 10.257719376742722, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 870008000, + "step": 8973 + }, + { + "epoch": 0.8772976143918655, + "loss": 0.06568709015846252, + "loss_ce": 0.0054835365153849125, + "loss_iou": 0.2138671875, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 870008000, + "step": 8973 + }, + { + "epoch": 0.8773953852170512, + "grad_norm": 8.080414043043357, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 870105264, + "step": 8974 + }, + { + "epoch": 0.8773953852170512, + "loss": 0.06980066746473312, + "loss_ce": 0.004172616638243198, + "loss_iou": 0.306640625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 870105264, + "step": 8974 + }, + { + "epoch": 0.877493156042237, + "grad_norm": 3.493566387192789, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 870202088, + "step": 8975 + }, + { + "epoch": 0.877493156042237, + "loss": 0.11623350530862808, + "loss_ce": 0.005958236288279295, + "loss_iou": 0.265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 870202088, + "step": 8975 + }, + { + "epoch": 0.8775909268674228, + "grad_norm": 10.033687263701298, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 870299232, + "step": 8976 + }, + { + "epoch": 0.8775909268674228, + "loss": 0.07777318358421326, + "loss_ce": 0.006499381270259619, + "loss_iou": 0.3046875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 870299232, + "step": 8976 + }, + { + "epoch": 0.8776886976926085, + "grad_norm": 6.836455587989225, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 870396252, + "step": 8977 + }, + { + "epoch": 0.8776886976926085, + "loss": 0.05960126966238022, + "loss_ce": 0.003426036797463894, + "loss_iou": 0.255859375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 870396252, + "step": 8977 + }, + { + "epoch": 0.8777864685177943, + "grad_norm": 4.155815303414474, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 870492528, + "step": 8978 + }, + { + "epoch": 0.8777864685177943, + "loss": 0.0392097570002079, + "loss_ce": 0.002329261740669608, + "loss_iou": 0.28125, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 870492528, + "step": 8978 + }, + { + "epoch": 0.8778842393429801, + "grad_norm": 8.763852861282889, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 870589960, + "step": 8979 + }, + { + "epoch": 0.8778842393429801, + "loss": 0.07533514499664307, + "loss_ce": 0.003252623835578561, + "loss_iou": 0.279296875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 870589960, + "step": 8979 + }, + { + "epoch": 0.8779820101681658, + "grad_norm": 22.725245285541206, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 870687736, + "step": 8980 + }, + { + "epoch": 0.8779820101681658, + "loss": 0.04527398943901062, + "loss_ce": 0.0037700803950428963, + "loss_iou": 0.318359375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 870687736, + "step": 8980 + }, + { + "epoch": 0.8780797809933516, + "grad_norm": 23.815920996825213, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 870784716, + "step": 8981 + }, + { + "epoch": 0.8780797809933516, + "loss": 0.05550220236182213, + "loss_ce": 0.005316046066582203, + "loss_iou": 0.28125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 870784716, + "step": 8981 + }, + { + "epoch": 0.8781775518185373, + "grad_norm": 5.309071240191655, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 870881992, + "step": 8982 + }, + { + "epoch": 0.8781775518185373, + "loss": 0.04925111308693886, + "loss_ce": 0.009830028750002384, + "loss_iou": 0.294921875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 870881992, + "step": 8982 + }, + { + "epoch": 0.8782753226437231, + "grad_norm": 5.391428035548657, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 870979272, + "step": 8983 + }, + { + "epoch": 0.8782753226437231, + "loss": 0.03641270101070404, + "loss_ce": 0.0016760691069066525, + "loss_iou": 0.306640625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 870979272, + "step": 8983 + }, + { + "epoch": 0.8783730934689089, + "grad_norm": 3.2288405951300856, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 871076888, + "step": 8984 + }, + { + "epoch": 0.8783730934689089, + "loss": 0.06456588208675385, + "loss_ce": 0.004881125874817371, + "loss_iou": 0.380859375, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 871076888, + "step": 8984 + }, + { + "epoch": 0.8784708642940946, + "grad_norm": 9.565468832473819, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 871174112, + "step": 8985 + }, + { + "epoch": 0.8784708642940946, + "loss": 0.08112726360559464, + "loss_ce": 0.0019036318408325315, + "loss_iou": 0.333984375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 871174112, + "step": 8985 + }, + { + "epoch": 0.8785686351192804, + "grad_norm": 3.2639370942689037, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 871270828, + "step": 8986 + }, + { + "epoch": 0.8785686351192804, + "loss": 0.08242658525705338, + "loss_ce": 0.005583322141319513, + "loss_iou": 0.32421875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 871270828, + "step": 8986 + }, + { + "epoch": 0.8786664059444662, + "grad_norm": 16.418505480841517, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 871367132, + "step": 8987 + }, + { + "epoch": 0.8786664059444662, + "loss": 0.09771265089511871, + "loss_ce": 0.00805200170725584, + "loss_iou": 0.22265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 871367132, + "step": 8987 + }, + { + "epoch": 0.8787641767696519, + "grad_norm": 8.95411863536741, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 871464456, + "step": 8988 + }, + { + "epoch": 0.8787641767696519, + "loss": 0.09461235255002975, + "loss_ce": 0.00409722002223134, + "loss_iou": 0.298828125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 871464456, + "step": 8988 + }, + { + "epoch": 0.8788619475948377, + "grad_norm": 7.325016860929355, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 871560532, + "step": 8989 + }, + { + "epoch": 0.8788619475948377, + "loss": 0.07133875787258148, + "loss_ce": 0.005199534818530083, + "loss_iou": 0.244140625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 871560532, + "step": 8989 + }, + { + "epoch": 0.8789597184200235, + "grad_norm": 26.838497840854927, + "learning_rate": 5e-05, + "loss": 0.122, + "num_input_tokens_seen": 871658728, + "step": 8990 + }, + { + "epoch": 0.8789597184200235, + "loss": 0.10280023515224457, + "loss_ce": 0.008363588713109493, + "loss_iou": 0.36328125, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 871658728, + "step": 8990 + }, + { + "epoch": 0.8790574892452092, + "grad_norm": 6.90258376916714, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 871755452, + "step": 8991 + }, + { + "epoch": 0.8790574892452092, + "loss": 0.08992060273885727, + "loss_ce": 0.0064702872186899185, + "loss_iou": 0.279296875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 871755452, + "step": 8991 + }, + { + "epoch": 0.879155260070395, + "grad_norm": 6.283830694515389, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 871852280, + "step": 8992 + }, + { + "epoch": 0.879155260070395, + "loss": 0.06535956263542175, + "loss_ce": 0.010641541332006454, + "loss_iou": 0.25390625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 871852280, + "step": 8992 + }, + { + "epoch": 0.8792530308955807, + "grad_norm": 3.6894158632590646, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 871950000, + "step": 8993 + }, + { + "epoch": 0.8792530308955807, + "loss": 0.04484376311302185, + "loss_ce": 0.002832502592355013, + "loss_iou": 0.2578125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 871950000, + "step": 8993 + }, + { + "epoch": 0.8793508017207665, + "grad_norm": 9.073731064264809, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 872046252, + "step": 8994 + }, + { + "epoch": 0.8793508017207665, + "loss": 0.04665004462003708, + "loss_ce": 0.009426230564713478, + "loss_iou": 0.232421875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 872046252, + "step": 8994 + }, + { + "epoch": 0.8794485725459523, + "grad_norm": 5.331925686872095, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 872143000, + "step": 8995 + }, + { + "epoch": 0.8794485725459523, + "loss": 0.11747179925441742, + "loss_ce": 0.008089166134595871, + "loss_iou": 0.2109375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 872143000, + "step": 8995 + }, + { + "epoch": 0.879546343371138, + "grad_norm": 6.7476705719377135, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 872239720, + "step": 8996 + }, + { + "epoch": 0.879546343371138, + "loss": 0.05671219527721405, + "loss_ce": 0.005686803720891476, + "loss_iou": 0.306640625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 872239720, + "step": 8996 + }, + { + "epoch": 0.8796441141963238, + "grad_norm": 3.9484317434900773, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 872336120, + "step": 8997 + }, + { + "epoch": 0.8796441141963238, + "loss": 0.03722181171178818, + "loss_ce": 0.0074404701590538025, + "loss_iou": 0.26171875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 872336120, + "step": 8997 + }, + { + "epoch": 0.8797418850215096, + "grad_norm": 4.421844180753744, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 872433468, + "step": 8998 + }, + { + "epoch": 0.8797418850215096, + "loss": 0.07379327714443207, + "loss_ce": 0.004327640403062105, + "loss_iou": 0.291015625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 872433468, + "step": 8998 + }, + { + "epoch": 0.8798396558466953, + "grad_norm": 27.25180889587639, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 872531108, + "step": 8999 + }, + { + "epoch": 0.8798396558466953, + "loss": 0.09056903421878815, + "loss_ce": 0.004463684745132923, + "loss_iou": 0.337890625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 872531108, + "step": 8999 + }, + { + "epoch": 0.8799374266718811, + "grad_norm": 57.20579076792825, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 872628068, + "step": 9000 + }, + { + "epoch": 0.8799374266718811, + "eval_seeclick_CIoU": 0.5797188878059387, + "eval_seeclick_GIoU": 0.5832484364509583, + "eval_seeclick_IoU": 0.6128273010253906, + "eval_seeclick_MAE_all": 0.06733812764286995, + "eval_seeclick_MAE_h": 0.0430764053016901, + "eval_seeclick_MAE_w": 0.09413786977529526, + "eval_seeclick_MAE_x": 0.08628085628151894, + "eval_seeclick_MAE_y": 0.04585738107562065, + "eval_seeclick_NUM_probability": 0.9999985098838806, + "eval_seeclick_inside_bbox": 0.8153409063816071, + "eval_seeclick_loss": 0.24316728115081787, + "eval_seeclick_loss_ce": 0.00987795228138566, + "eval_seeclick_loss_iou": 0.401611328125, + "eval_seeclick_loss_num": 0.0466766357421875, + "eval_seeclick_loss_xval": 0.2333984375, + "eval_seeclick_runtime": 76.7812, + "eval_seeclick_samples_per_second": 0.56, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 872628068, + "step": 9000 + }, + { + "epoch": 0.8799374266718811, + "eval_icons_CIoU": 0.6837756633758545, + "eval_icons_GIoU": 0.6833120882511139, + "eval_icons_IoU": 0.707092672586441, + "eval_icons_MAE_all": 0.052988117560744286, + "eval_icons_MAE_h": 0.052299827337265015, + "eval_icons_MAE_w": 0.05020219273865223, + "eval_icons_MAE_x": 0.052358150482177734, + "eval_icons_MAE_y": 0.05709228292107582, + "eval_icons_NUM_probability": 0.999999076128006, + "eval_icons_inside_bbox": 0.8628472089767456, + "eval_icons_loss": 0.17364905774593353, + "eval_icons_loss_ce": 1.2823687285390406e-05, + "eval_icons_loss_iou": 0.45166015625, + "eval_icons_loss_num": 0.039794921875, + "eval_icons_loss_xval": 0.19891357421875, + "eval_icons_runtime": 84.7612, + "eval_icons_samples_per_second": 0.59, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 872628068, + "step": 9000 + }, + { + "epoch": 0.8799374266718811, + "eval_screenspot_CIoU": 0.2673366243640582, + "eval_screenspot_GIoU": 0.23995538552602133, + "eval_screenspot_IoU": 0.3737157533566157, + "eval_screenspot_MAE_all": 0.19382684926191965, + "eval_screenspot_MAE_h": 0.14787261436382929, + "eval_screenspot_MAE_w": 0.251303568482399, + "eval_screenspot_MAE_x": 0.22229125102361044, + "eval_screenspot_MAE_y": 0.1538399582107862, + "eval_screenspot_NUM_probability": 0.9999930063883463, + "eval_screenspot_inside_bbox": 0.5958333412806193, + "eval_screenspot_loss": 0.640428364276886, + "eval_screenspot_loss_ce": 0.01753769100954135, + "eval_screenspot_loss_iou": 0.3201090494791667, + "eval_screenspot_loss_num": 0.12560526529947916, + "eval_screenspot_loss_xval": 0.6280110677083334, + "eval_screenspot_runtime": 150.4118, + "eval_screenspot_samples_per_second": 0.592, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 872628068, + "step": 9000 + }, + { + "epoch": 0.8799374266718811, + "eval_compot_CIoU": 0.46915118396282196, + "eval_compot_GIoU": 0.46309903264045715, + "eval_compot_IoU": 0.5302342474460602, + "eval_compot_MAE_all": 0.09504339098930359, + "eval_compot_MAE_h": 0.0753173828125, + "eval_compot_MAE_w": 0.11252738162875175, + "eval_compot_MAE_x": 0.11340628191828728, + "eval_compot_MAE_y": 0.07892252132296562, + "eval_compot_NUM_probability": 0.9999889433383942, + "eval_compot_inside_bbox": 0.7204861044883728, + "eval_compot_loss": 0.31651824712753296, + "eval_compot_loss_ce": 0.019215110689401627, + "eval_compot_loss_iou": 0.39813232421875, + "eval_compot_loss_num": 0.055515289306640625, + "eval_compot_loss_xval": 0.2774200439453125, + "eval_compot_runtime": 85.1137, + "eval_compot_samples_per_second": 0.587, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 872628068, + "step": 9000 + }, + { + "epoch": 0.8799374266718811, + "loss": 0.2554219663143158, + "loss_ce": 0.016133643686771393, + "loss_iou": 0.44140625, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 872628068, + "step": 9000 + }, + { + "epoch": 0.8800351974970668, + "grad_norm": 13.37419460349774, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 872725088, + "step": 9001 + }, + { + "epoch": 0.8800351974970668, + "loss": 0.09242529422044754, + "loss_ce": 0.00703711062669754, + "loss_iou": 0.318359375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 872725088, + "step": 9001 + }, + { + "epoch": 0.8801329683222526, + "grad_norm": 3.0710426621498716, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 872821976, + "step": 9002 + }, + { + "epoch": 0.8801329683222526, + "loss": 0.05486353486776352, + "loss_ce": 0.0028768437914550304, + "loss_iou": 0.24609375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 872821976, + "step": 9002 + }, + { + "epoch": 0.8802307391474384, + "grad_norm": 7.347915649458831, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 872918288, + "step": 9003 + }, + { + "epoch": 0.8802307391474384, + "loss": 0.055073343217372894, + "loss_ce": 0.007664282340556383, + "loss_iou": 0.28125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 872918288, + "step": 9003 + }, + { + "epoch": 0.8803285099726241, + "grad_norm": 10.725683654480687, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 873014716, + "step": 9004 + }, + { + "epoch": 0.8803285099726241, + "loss": 0.05961924046278, + "loss_ce": 0.006175331771373749, + "loss_iou": 0.1728515625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 873014716, + "step": 9004 + }, + { + "epoch": 0.8804262807978099, + "grad_norm": 8.122387062649526, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 873111412, + "step": 9005 + }, + { + "epoch": 0.8804262807978099, + "loss": 0.06577378511428833, + "loss_ce": 0.005501571111381054, + "loss_iou": 0.30859375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 873111412, + "step": 9005 + }, + { + "epoch": 0.8805240516229957, + "grad_norm": 3.6681533853171118, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 873208876, + "step": 9006 + }, + { + "epoch": 0.8805240516229957, + "loss": 0.055159006267786026, + "loss_ce": 0.005850231274962425, + "loss_iou": 0.357421875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 873208876, + "step": 9006 + }, + { + "epoch": 0.8806218224481814, + "grad_norm": 4.971503323443071, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 873305104, + "step": 9007 + }, + { + "epoch": 0.8806218224481814, + "loss": 0.05148209631443024, + "loss_ce": 0.0035542426630854607, + "loss_iou": 0.330078125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 873305104, + "step": 9007 + }, + { + "epoch": 0.8807195932733672, + "grad_norm": 2.232840010204463, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 873401248, + "step": 9008 + }, + { + "epoch": 0.8807195932733672, + "loss": 0.05426327511668205, + "loss_ce": 0.004214446526020765, + "loss_iou": 0.208984375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 873401248, + "step": 9008 + }, + { + "epoch": 0.880817364098553, + "grad_norm": 6.420763764642122, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 873497532, + "step": 9009 + }, + { + "epoch": 0.880817364098553, + "loss": 0.07866260409355164, + "loss_ce": 0.009849284775555134, + "loss_iou": 0.193359375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 873497532, + "step": 9009 + }, + { + "epoch": 0.8809151349237387, + "grad_norm": 2.616555976074012, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 873594896, + "step": 9010 + }, + { + "epoch": 0.8809151349237387, + "loss": 0.06462310254573822, + "loss_ce": 0.008989561349153519, + "loss_iou": 0.298828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 873594896, + "step": 9010 + }, + { + "epoch": 0.8810129057489245, + "grad_norm": 6.88254827597488, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 873692540, + "step": 9011 + }, + { + "epoch": 0.8810129057489245, + "loss": 0.0749247819185257, + "loss_ce": 0.00910599809139967, + "loss_iou": 0.2177734375, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 873692540, + "step": 9011 + }, + { + "epoch": 0.8811106765741102, + "grad_norm": 4.8424171394211255, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 873790272, + "step": 9012 + }, + { + "epoch": 0.8811106765741102, + "loss": 0.05914684757590294, + "loss_ce": 0.0049018519930541515, + "loss_iou": 0.32421875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 873790272, + "step": 9012 + }, + { + "epoch": 0.881208447399296, + "grad_norm": 8.348968662050172, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 873886248, + "step": 9013 + }, + { + "epoch": 0.881208447399296, + "loss": 0.044241875410079956, + "loss_ce": 0.0025777528062462807, + "loss_iou": 0.2099609375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 873886248, + "step": 9013 + }, + { + "epoch": 0.8813062182244819, + "grad_norm": 11.933600531519298, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 873983312, + "step": 9014 + }, + { + "epoch": 0.8813062182244819, + "loss": 0.045668475329875946, + "loss_ce": 0.002150411019101739, + "loss_iou": 0.30078125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 873983312, + "step": 9014 + }, + { + "epoch": 0.8814039890496675, + "grad_norm": 5.566607251033418, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 874080304, + "step": 9015 + }, + { + "epoch": 0.8814039890496675, + "loss": 0.057936087250709534, + "loss_ce": 0.00426329905167222, + "loss_iou": 0.328125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 874080304, + "step": 9015 + }, + { + "epoch": 0.8815017598748534, + "grad_norm": 5.325745698283937, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 874176288, + "step": 9016 + }, + { + "epoch": 0.8815017598748534, + "loss": 0.0685216560959816, + "loss_ce": 0.0063039446249604225, + "loss_iou": 0.2041015625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 874176288, + "step": 9016 + }, + { + "epoch": 0.8815995307000392, + "grad_norm": 8.978639935293897, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 874273600, + "step": 9017 + }, + { + "epoch": 0.8815995307000392, + "loss": 0.04703234136104584, + "loss_ce": 0.0014695998979732394, + "loss_iou": 0.28125, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 874273600, + "step": 9017 + }, + { + "epoch": 0.8816973015252249, + "grad_norm": 4.352944579270848, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 874370304, + "step": 9018 + }, + { + "epoch": 0.8816973015252249, + "loss": 0.03367890417575836, + "loss_ce": 0.004115001298487186, + "loss_iou": 0.28125, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 874370304, + "step": 9018 + }, + { + "epoch": 0.8817950723504107, + "grad_norm": 5.7837610496264835, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 874467192, + "step": 9019 + }, + { + "epoch": 0.8817950723504107, + "loss": 0.057336121797561646, + "loss_ce": 0.004639896098524332, + "loss_iou": 0.228515625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 874467192, + "step": 9019 + }, + { + "epoch": 0.8818928431755964, + "grad_norm": 7.4112806890525915, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 874564660, + "step": 9020 + }, + { + "epoch": 0.8818928431755964, + "loss": 0.10927732288837433, + "loss_ce": 0.0038238337729126215, + "loss_iou": 0.232421875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 874564660, + "step": 9020 + }, + { + "epoch": 0.8819906140007822, + "grad_norm": 16.58168002505847, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 874661800, + "step": 9021 + }, + { + "epoch": 0.8819906140007822, + "loss": 0.0475570410490036, + "loss_ce": 0.006365936249494553, + "loss_iou": 0.375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 874661800, + "step": 9021 + }, + { + "epoch": 0.882088384825968, + "grad_norm": 6.478566429289257, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 874757932, + "step": 9022 + }, + { + "epoch": 0.882088384825968, + "loss": 0.1224038377404213, + "loss_ce": 0.007215238641947508, + "loss_iou": 0.3359375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 874757932, + "step": 9022 + }, + { + "epoch": 0.8821861556511537, + "grad_norm": 2.3666830651369435, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 874854768, + "step": 9023 + }, + { + "epoch": 0.8821861556511537, + "loss": 0.07557296752929688, + "loss_ce": 0.004909513518214226, + "loss_iou": 0.2578125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 874854768, + "step": 9023 + }, + { + "epoch": 0.8822839264763395, + "grad_norm": 7.271081117572984, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 874951764, + "step": 9024 + }, + { + "epoch": 0.8822839264763395, + "loss": 0.0672137588262558, + "loss_ce": 0.0038287430070340633, + "loss_iou": 0.302734375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 874951764, + "step": 9024 + }, + { + "epoch": 0.8823816973015253, + "grad_norm": 3.255492971379892, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 875048616, + "step": 9025 + }, + { + "epoch": 0.8823816973015253, + "loss": 0.0734775960445404, + "loss_ce": 0.009482236579060555, + "loss_iou": 0.2021484375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 875048616, + "step": 9025 + }, + { + "epoch": 0.882479468126711, + "grad_norm": 15.16834422415193, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 875145184, + "step": 9026 + }, + { + "epoch": 0.882479468126711, + "loss": 0.0798543393611908, + "loss_ce": 0.004987084306776524, + "loss_iou": 0.24609375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 875145184, + "step": 9026 + }, + { + "epoch": 0.8825772389518968, + "grad_norm": 6.828209526051557, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 875242604, + "step": 9027 + }, + { + "epoch": 0.8825772389518968, + "loss": 0.04362775757908821, + "loss_ce": 0.003993053454905748, + "loss_iou": 0.220703125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 875242604, + "step": 9027 + }, + { + "epoch": 0.8826750097770825, + "grad_norm": 3.8594970624237424, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 875339060, + "step": 9028 + }, + { + "epoch": 0.8826750097770825, + "loss": 0.06374700367450714, + "loss_ce": 0.005549983121454716, + "loss_iou": 0.2177734375, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 875339060, + "step": 9028 + }, + { + "epoch": 0.8827727806022683, + "grad_norm": 6.425687352018149, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 875436256, + "step": 9029 + }, + { + "epoch": 0.8827727806022683, + "loss": 0.07051453739404678, + "loss_ce": 0.0058783055283129215, + "loss_iou": 0.3046875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 875436256, + "step": 9029 + }, + { + "epoch": 0.8828705514274541, + "grad_norm": 3.8420597692815783, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 875532748, + "step": 9030 + }, + { + "epoch": 0.8828705514274541, + "loss": 0.045224763453006744, + "loss_ce": 0.003720856038853526, + "loss_iou": 0.224609375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 875532748, + "step": 9030 + }, + { + "epoch": 0.8829683222526398, + "grad_norm": 9.279385668584453, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 875630232, + "step": 9031 + }, + { + "epoch": 0.8829683222526398, + "loss": 0.055097464472055435, + "loss_ce": 0.002881885040551424, + "loss_iou": 0.3828125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 875630232, + "step": 9031 + }, + { + "epoch": 0.8830660930778256, + "grad_norm": 36.43407029866286, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 875726704, + "step": 9032 + }, + { + "epoch": 0.8830660930778256, + "loss": 0.09423281252384186, + "loss_ce": 0.0075781517662107944, + "loss_iou": 0.330078125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 875726704, + "step": 9032 + }, + { + "epoch": 0.8831638639030114, + "grad_norm": 8.75210431161939, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 875823460, + "step": 9033 + }, + { + "epoch": 0.8831638639030114, + "loss": 0.0554642453789711, + "loss_ce": 0.00501105934381485, + "loss_iou": 0.287109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 875823460, + "step": 9033 + }, + { + "epoch": 0.8832616347281971, + "grad_norm": 6.895097667657099, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 875920828, + "step": 9034 + }, + { + "epoch": 0.8832616347281971, + "loss": 0.07066395878791809, + "loss_ce": 0.007584121078252792, + "loss_iou": 0.30859375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 875920828, + "step": 9034 + }, + { + "epoch": 0.8833594055533829, + "grad_norm": 11.097182374247481, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 876017132, + "step": 9035 + }, + { + "epoch": 0.8833594055533829, + "loss": 0.07096600532531738, + "loss_ce": 0.007733583450317383, + "loss_iou": 0.2373046875, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 876017132, + "step": 9035 + }, + { + "epoch": 0.8834571763785687, + "grad_norm": 1.9618334721319668, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 876114224, + "step": 9036 + }, + { + "epoch": 0.8834571763785687, + "loss": 0.07288196682929993, + "loss_ce": 0.004972721450030804, + "loss_iou": 0.26171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 876114224, + "step": 9036 + }, + { + "epoch": 0.8835549472037544, + "grad_norm": 12.910535620344112, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 876211148, + "step": 9037 + }, + { + "epoch": 0.8835549472037544, + "loss": 0.11618606001138687, + "loss_ce": 0.007253565825521946, + "loss_iou": 0.25390625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 876211148, + "step": 9037 + }, + { + "epoch": 0.8836527180289402, + "grad_norm": 2.494214786613293, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 876307376, + "step": 9038 + }, + { + "epoch": 0.8836527180289402, + "loss": 0.07154453545808792, + "loss_ce": 0.015476117841899395, + "loss_iou": 0.1328125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 876307376, + "step": 9038 + }, + { + "epoch": 0.8837504888541259, + "grad_norm": 8.226385131924093, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 876404412, + "step": 9039 + }, + { + "epoch": 0.8837504888541259, + "loss": 0.052253007888793945, + "loss_ce": 0.003852128516882658, + "loss_iou": 0.265625, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 876404412, + "step": 9039 + }, + { + "epoch": 0.8838482596793117, + "grad_norm": 6.988230007684901, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 876502032, + "step": 9040 + }, + { + "epoch": 0.8838482596793117, + "loss": 0.07042776048183441, + "loss_ce": 0.004113059025257826, + "loss_iou": 0.3203125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 876502032, + "step": 9040 + }, + { + "epoch": 0.8839460305044975, + "grad_norm": 4.960854001843045, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 876599204, + "step": 9041 + }, + { + "epoch": 0.8839460305044975, + "loss": 0.07004395127296448, + "loss_ce": 0.006735230330377817, + "loss_iou": 0.322265625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 876599204, + "step": 9041 + }, + { + "epoch": 0.8840438013296832, + "grad_norm": 4.41620127565161, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 876696576, + "step": 9042 + }, + { + "epoch": 0.8840438013296832, + "loss": 0.06032402068376541, + "loss_ce": 0.003179857973009348, + "loss_iou": 0.279296875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 876696576, + "step": 9042 + }, + { + "epoch": 0.884141572154869, + "grad_norm": 14.985786942593078, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 876793080, + "step": 9043 + }, + { + "epoch": 0.884141572154869, + "loss": 0.05524718761444092, + "loss_ce": 0.002360221929848194, + "loss_iou": 0.22265625, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 876793080, + "step": 9043 + }, + { + "epoch": 0.8842393429800548, + "grad_norm": 13.67568850043436, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 876891376, + "step": 9044 + }, + { + "epoch": 0.8842393429800548, + "loss": 0.08634201437234879, + "loss_ce": 0.00405899528414011, + "loss_iou": 0.34765625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 876891376, + "step": 9044 + }, + { + "epoch": 0.8843371138052405, + "grad_norm": 10.730208129891862, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 876988264, + "step": 9045 + }, + { + "epoch": 0.8843371138052405, + "loss": 0.08866658806800842, + "loss_ce": 0.004666950553655624, + "loss_iou": 0.31640625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 876988264, + "step": 9045 + }, + { + "epoch": 0.8844348846304263, + "grad_norm": 12.896895142015929, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 877085540, + "step": 9046 + }, + { + "epoch": 0.8844348846304263, + "loss": 0.09237857162952423, + "loss_ce": 0.0038012966979295015, + "loss_iou": 0.2236328125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 877085540, + "step": 9046 + }, + { + "epoch": 0.884532655455612, + "grad_norm": 6.949571436606808, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 877182060, + "step": 9047 + }, + { + "epoch": 0.884532655455612, + "loss": 0.06558205187320709, + "loss_ce": 0.004852075129747391, + "loss_iou": 0.279296875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 877182060, + "step": 9047 + }, + { + "epoch": 0.8846304262807978, + "grad_norm": 8.62846987436938, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 877279164, + "step": 9048 + }, + { + "epoch": 0.8846304262807978, + "loss": 0.07477585971355438, + "loss_ce": 0.004844825714826584, + "loss_iou": 0.267578125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 877279164, + "step": 9048 + }, + { + "epoch": 0.8847281971059836, + "grad_norm": 8.167148142400835, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 877376920, + "step": 9049 + }, + { + "epoch": 0.8847281971059836, + "loss": 0.03892466425895691, + "loss_ce": 0.0038447100669145584, + "loss_iou": 0.2177734375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 877376920, + "step": 9049 + }, + { + "epoch": 0.8848259679311693, + "grad_norm": 5.326672906811688, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 877473092, + "step": 9050 + }, + { + "epoch": 0.8848259679311693, + "loss": 0.0563371405005455, + "loss_ce": 0.007875227369368076, + "loss_iou": 0.244140625, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 877473092, + "step": 9050 + }, + { + "epoch": 0.8849237387563551, + "grad_norm": 7.57035539531014, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 877570424, + "step": 9051 + }, + { + "epoch": 0.8849237387563551, + "loss": 0.10027700662612915, + "loss_ce": 0.0030480031855404377, + "loss_iou": 0.3515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 877570424, + "step": 9051 + }, + { + "epoch": 0.8850215095815409, + "grad_norm": 13.966833055380098, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 877667952, + "step": 9052 + }, + { + "epoch": 0.8850215095815409, + "loss": 0.11266930401325226, + "loss_ce": 0.01089317724108696, + "loss_iou": 0.4765625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 877667952, + "step": 9052 + }, + { + "epoch": 0.8851192804067266, + "grad_norm": 14.599549074997627, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 877765520, + "step": 9053 + }, + { + "epoch": 0.8851192804067266, + "loss": 0.12675490975379944, + "loss_ce": 0.008072040043771267, + "loss_iou": 0.302734375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 877765520, + "step": 9053 + }, + { + "epoch": 0.8852170512319124, + "grad_norm": 6.504527432680599, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 877863792, + "step": 9054 + }, + { + "epoch": 0.8852170512319124, + "loss": 0.0889083594083786, + "loss_ce": 0.004862949252128601, + "loss_iou": 0.388671875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 877863792, + "step": 9054 + }, + { + "epoch": 0.8853148220570982, + "grad_norm": 8.559829942814474, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 877960656, + "step": 9055 + }, + { + "epoch": 0.8853148220570982, + "loss": 0.08706948161125183, + "loss_ce": 0.003115621395409107, + "loss_iou": 0.23828125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 877960656, + "step": 9055 + }, + { + "epoch": 0.8854125928822839, + "grad_norm": 6.479364002173616, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 878057440, + "step": 9056 + }, + { + "epoch": 0.8854125928822839, + "loss": 0.0989062637090683, + "loss_ce": 0.003432023571804166, + "loss_iou": 0.267578125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 878057440, + "step": 9056 + }, + { + "epoch": 0.8855103637074697, + "grad_norm": 9.122370795025322, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 878153464, + "step": 9057 + }, + { + "epoch": 0.8855103637074697, + "loss": 0.05479203909635544, + "loss_ce": 0.005208604037761688, + "loss_iou": 0.173828125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 878153464, + "step": 9057 + }, + { + "epoch": 0.8856081345326554, + "grad_norm": 3.0255425874987916, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 878250512, + "step": 9058 + }, + { + "epoch": 0.8856081345326554, + "loss": 0.04093621298670769, + "loss_ce": 0.004200678318738937, + "loss_iou": 0.33203125, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 878250512, + "step": 9058 + }, + { + "epoch": 0.8857059053578412, + "grad_norm": 4.2661554989805985, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 878348284, + "step": 9059 + }, + { + "epoch": 0.8857059053578412, + "loss": 0.06096753105521202, + "loss_ce": 0.008629885502159595, + "loss_iou": 0.35546875, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 878348284, + "step": 9059 + }, + { + "epoch": 0.885803676183027, + "grad_norm": 15.183759176305092, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 878443672, + "step": 9060 + }, + { + "epoch": 0.885803676183027, + "loss": 0.06664621829986572, + "loss_ce": 0.006217604968696833, + "loss_iou": 0.2041015625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 878443672, + "step": 9060 + }, + { + "epoch": 0.8859014470082127, + "grad_norm": 11.80578118687731, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 878541104, + "step": 9061 + }, + { + "epoch": 0.8859014470082127, + "loss": 0.07230322062969208, + "loss_ce": 0.00448553916066885, + "loss_iou": 0.21484375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 878541104, + "step": 9061 + }, + { + "epoch": 0.8859992178333985, + "grad_norm": 15.616992559805354, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 878638416, + "step": 9062 + }, + { + "epoch": 0.8859992178333985, + "loss": 0.06459279358386993, + "loss_ce": 0.009432275779545307, + "loss_iou": 0.333984375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 878638416, + "step": 9062 + }, + { + "epoch": 0.8860969886585843, + "grad_norm": 20.498982070296424, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 878734392, + "step": 9063 + }, + { + "epoch": 0.8860969886585843, + "loss": 0.09901884943246841, + "loss_ce": 0.004994190763682127, + "loss_iou": 0.1533203125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 878734392, + "step": 9063 + }, + { + "epoch": 0.88619475948377, + "grad_norm": 4.767706674197943, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 878830416, + "step": 9064 + }, + { + "epoch": 0.88619475948377, + "loss": 0.057817842811346054, + "loss_ce": 0.010530858300626278, + "loss_iou": 0.2138671875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 878830416, + "step": 9064 + }, + { + "epoch": 0.8862925303089558, + "grad_norm": 7.240788755875435, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 878927452, + "step": 9065 + }, + { + "epoch": 0.8862925303089558, + "loss": 0.0770341008901596, + "loss_ce": 0.007103067822754383, + "loss_iou": 0.3046875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 878927452, + "step": 9065 + }, + { + "epoch": 0.8863903011341415, + "grad_norm": 4.112008906239038, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 879024484, + "step": 9066 + }, + { + "epoch": 0.8863903011341415, + "loss": 0.043052393943071365, + "loss_ce": 0.005881982855498791, + "loss_iou": 0.333984375, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 879024484, + "step": 9066 + }, + { + "epoch": 0.8864880719593273, + "grad_norm": 14.44342443641698, + "learning_rate": 5e-05, + "loss": 0.1321, + "num_input_tokens_seen": 879122012, + "step": 9067 + }, + { + "epoch": 0.8864880719593273, + "loss": 0.14386138319969177, + "loss_ce": 0.010377495549619198, + "loss_iou": 0.330078125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 879122012, + "step": 9067 + }, + { + "epoch": 0.8865858427845131, + "grad_norm": 8.522363946460494, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 879219060, + "step": 9068 + }, + { + "epoch": 0.8865858427845131, + "loss": 0.035094793885946274, + "loss_ce": 0.005012089386582375, + "loss_iou": 0.337890625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 879219060, + "step": 9068 + }, + { + "epoch": 0.8866836136096988, + "grad_norm": 16.820804953093702, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 879316600, + "step": 9069 + }, + { + "epoch": 0.8866836136096988, + "loss": 0.06974456459283829, + "loss_ce": 0.007030940614640713, + "loss_iou": 0.298828125, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 879316600, + "step": 9069 + }, + { + "epoch": 0.8867813844348846, + "grad_norm": 17.875312427486488, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 879413920, + "step": 9070 + }, + { + "epoch": 0.8867813844348846, + "loss": 0.09450700879096985, + "loss_ce": 0.0029237577691674232, + "loss_iou": 0.380859375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 879413920, + "step": 9070 + }, + { + "epoch": 0.8868791552600704, + "grad_norm": 16.30934298352895, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 879510036, + "step": 9071 + }, + { + "epoch": 0.8868791552600704, + "loss": 0.0622406005859375, + "loss_ce": 0.006530760787427425, + "loss_iou": 0.2001953125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 879510036, + "step": 9071 + }, + { + "epoch": 0.8869769260852561, + "grad_norm": 3.195859313356122, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 879606908, + "step": 9072 + }, + { + "epoch": 0.8869769260852561, + "loss": 0.06586942076683044, + "loss_ce": 0.00610074819996953, + "loss_iou": 0.2392578125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 879606908, + "step": 9072 + }, + { + "epoch": 0.8870746969104419, + "grad_norm": 7.638511512606573, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 879702904, + "step": 9073 + }, + { + "epoch": 0.8870746969104419, + "loss": 0.07982359826564789, + "loss_ce": 0.006245715543627739, + "loss_iou": 0.255859375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 879702904, + "step": 9073 + }, + { + "epoch": 0.8871724677356276, + "grad_norm": 5.30588198572803, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 879800628, + "step": 9074 + }, + { + "epoch": 0.8871724677356276, + "loss": 0.045798055827617645, + "loss_ce": 0.0029437460470944643, + "loss_iou": 0.259765625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 879800628, + "step": 9074 + }, + { + "epoch": 0.8872702385608134, + "grad_norm": 8.198448818492906, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 879897408, + "step": 9075 + }, + { + "epoch": 0.8872702385608134, + "loss": 0.12601864337921143, + "loss_ce": 0.0054703992791473866, + "loss_iou": 0.28125, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 879897408, + "step": 9075 + }, + { + "epoch": 0.8873680093859992, + "grad_norm": 12.394108451889362, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 879995144, + "step": 9076 + }, + { + "epoch": 0.8873680093859992, + "loss": 0.07759854197502136, + "loss_ce": 0.004585243761539459, + "loss_iou": 0.30078125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 879995144, + "step": 9076 + }, + { + "epoch": 0.8874657802111849, + "grad_norm": 4.640233173517043, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 880092868, + "step": 9077 + }, + { + "epoch": 0.8874657802111849, + "loss": 0.07056770473718643, + "loss_ce": 0.006206137593835592, + "loss_iou": 0.30859375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 880092868, + "step": 9077 + }, + { + "epoch": 0.8875635510363707, + "grad_norm": 3.831270310385665, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 880189836, + "step": 9078 + }, + { + "epoch": 0.8875635510363707, + "loss": 0.10045640170574188, + "loss_ce": 0.006889511365443468, + "loss_iou": 0.33203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 880189836, + "step": 9078 + }, + { + "epoch": 0.8876613218615566, + "grad_norm": 1.6751333530602737, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 880286092, + "step": 9079 + }, + { + "epoch": 0.8876613218615566, + "loss": 0.02994629181921482, + "loss_ce": 0.002800907241180539, + "loss_iou": 0.236328125, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 880286092, + "step": 9079 + }, + { + "epoch": 0.8877590926867422, + "grad_norm": 4.815659490397577, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 880383248, + "step": 9080 + }, + { + "epoch": 0.8877590926867422, + "loss": 0.06110767647624016, + "loss_ce": 0.004528086166828871, + "loss_iou": 0.328125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 880383248, + "step": 9080 + }, + { + "epoch": 0.887856863511928, + "grad_norm": 2.9992641944595113, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 880480136, + "step": 9081 + }, + { + "epoch": 0.887856863511928, + "loss": 0.0369856134057045, + "loss_ce": 0.004610277246683836, + "loss_iou": 0.330078125, + "loss_num": 0.0064697265625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 880480136, + "step": 9081 + }, + { + "epoch": 0.8879546343371139, + "grad_norm": 12.457581193599703, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 880576812, + "step": 9082 + }, + { + "epoch": 0.8879546343371139, + "loss": 0.06036879122257233, + "loss_ce": 0.004575030878186226, + "loss_iou": 0.279296875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 880576812, + "step": 9082 + }, + { + "epoch": 0.8880524051622996, + "grad_norm": 12.569571611145328, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 880673496, + "step": 9083 + }, + { + "epoch": 0.8880524051622996, + "loss": 0.06382140517234802, + "loss_ce": 0.006906123831868172, + "loss_iou": 0.32421875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 880673496, + "step": 9083 + }, + { + "epoch": 0.8881501759874854, + "grad_norm": 4.553024796059629, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 880770920, + "step": 9084 + }, + { + "epoch": 0.8881501759874854, + "loss": 0.08267722278833389, + "loss_ce": 0.005666111595928669, + "loss_iou": 0.31640625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 880770920, + "step": 9084 + }, + { + "epoch": 0.888247946812671, + "grad_norm": 3.678686757378188, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 880868768, + "step": 9085 + }, + { + "epoch": 0.888247946812671, + "loss": 0.10049278289079666, + "loss_ce": 0.0013411719119176269, + "loss_iou": 0.25, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 880868768, + "step": 9085 + }, + { + "epoch": 0.8883457176378569, + "grad_norm": 14.95832636501104, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 880964588, + "step": 9086 + }, + { + "epoch": 0.8883457176378569, + "loss": 0.04991082102060318, + "loss_ce": 0.002806940581649542, + "loss_iou": 0.1728515625, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 880964588, + "step": 9086 + }, + { + "epoch": 0.8884434884630427, + "grad_norm": 6.8216060377282846, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 881062132, + "step": 9087 + }, + { + "epoch": 0.8884434884630427, + "loss": 0.09634556621313095, + "loss_ce": 0.009278913959860802, + "loss_iou": 0.3203125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 881062132, + "step": 9087 + }, + { + "epoch": 0.8885412592882284, + "grad_norm": 17.120207913567768, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 881159212, + "step": 9088 + }, + { + "epoch": 0.8885412592882284, + "loss": 0.06992590427398682, + "loss_ce": 0.0011049446184188128, + "loss_iou": 0.291015625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 881159212, + "step": 9088 + }, + { + "epoch": 0.8886390301134142, + "grad_norm": 6.5808908007770315, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 881256092, + "step": 9089 + }, + { + "epoch": 0.8886390301134142, + "loss": 0.06010804697871208, + "loss_ce": 0.00572572136297822, + "loss_iou": 0.291015625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 881256092, + "step": 9089 + }, + { + "epoch": 0.8887368009386, + "grad_norm": 5.933103376457012, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 881352736, + "step": 9090 + }, + { + "epoch": 0.8887368009386, + "loss": 0.07653269916772842, + "loss_ce": 0.003427840769290924, + "loss_iou": 0.296875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 881352736, + "step": 9090 + }, + { + "epoch": 0.8888345717637857, + "grad_norm": 11.216314716423492, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 881449616, + "step": 9091 + }, + { + "epoch": 0.8888345717637857, + "loss": 0.10004591941833496, + "loss_ce": 0.004632715135812759, + "loss_iou": 0.2734375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 881449616, + "step": 9091 + }, + { + "epoch": 0.8889323425889715, + "grad_norm": 7.525229115010099, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 881545080, + "step": 9092 + }, + { + "epoch": 0.8889323425889715, + "loss": 0.12353268265724182, + "loss_ce": 0.008878146298229694, + "loss_iou": 0.265625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 881545080, + "step": 9092 + }, + { + "epoch": 0.8890301134141572, + "grad_norm": 3.038611266164885, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 881642832, + "step": 9093 + }, + { + "epoch": 0.8890301134141572, + "loss": 0.07413817942142487, + "loss_ce": 0.0032458482310175896, + "loss_iou": 0.333984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 881642832, + "step": 9093 + }, + { + "epoch": 0.889127884239343, + "grad_norm": 3.1851304311409785, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 881739232, + "step": 9094 + }, + { + "epoch": 0.889127884239343, + "loss": 0.054953522980213165, + "loss_ce": 0.002371737267822027, + "loss_iou": 0.23828125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 881739232, + "step": 9094 + }, + { + "epoch": 0.8892256550645288, + "grad_norm": 16.36976692718486, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 881836200, + "step": 9095 + }, + { + "epoch": 0.8892256550645288, + "loss": 0.07896432280540466, + "loss_ce": 0.005401703994721174, + "loss_iou": 0.32421875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 881836200, + "step": 9095 + }, + { + "epoch": 0.8893234258897145, + "grad_norm": 11.089304707032717, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 881933984, + "step": 9096 + }, + { + "epoch": 0.8893234258897145, + "loss": 0.0642298087477684, + "loss_ce": 0.006627882365137339, + "loss_iou": 0.365234375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 881933984, + "step": 9096 + }, + { + "epoch": 0.8894211967149003, + "grad_norm": 8.451079987519975, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 882031120, + "step": 9097 + }, + { + "epoch": 0.8894211967149003, + "loss": 0.09430970996618271, + "loss_ce": 0.0026196481194347143, + "loss_iou": 0.34765625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 882031120, + "step": 9097 + }, + { + "epoch": 0.8895189675400861, + "grad_norm": 9.916526226730088, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 882128060, + "step": 9098 + }, + { + "epoch": 0.8895189675400861, + "loss": 0.09120843559503555, + "loss_ce": 0.007048585917800665, + "loss_iou": 0.26171875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 882128060, + "step": 9098 + }, + { + "epoch": 0.8896167383652718, + "grad_norm": 3.9606831186363274, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 882224916, + "step": 9099 + }, + { + "epoch": 0.8896167383652718, + "loss": 0.1302284151315689, + "loss_ce": 0.00892103835940361, + "loss_iou": 0.26171875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 882224916, + "step": 9099 + }, + { + "epoch": 0.8897145091904576, + "grad_norm": 3.99445508279149, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 882321960, + "step": 9100 + }, + { + "epoch": 0.8897145091904576, + "loss": 0.10388132929801941, + "loss_ce": 0.004363498650491238, + "loss_iou": 0.228515625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 882321960, + "step": 9100 + }, + { + "epoch": 0.8898122800156434, + "grad_norm": 14.081428373756722, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 882418736, + "step": 9101 + }, + { + "epoch": 0.8898122800156434, + "loss": 0.07468263804912567, + "loss_ce": 0.0059265391901135445, + "loss_iou": 0.37890625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 882418736, + "step": 9101 + }, + { + "epoch": 0.8899100508408291, + "grad_norm": 6.981745198328329, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 882515188, + "step": 9102 + }, + { + "epoch": 0.8899100508408291, + "loss": 0.053749725222587585, + "loss_ce": 0.002922698389738798, + "loss_iou": 0.296875, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 882515188, + "step": 9102 + }, + { + "epoch": 0.8900078216660149, + "grad_norm": 6.9344458188334555, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 882612660, + "step": 9103 + }, + { + "epoch": 0.8900078216660149, + "loss": 0.07676536589860916, + "loss_ce": 0.012541125528514385, + "loss_iou": 0.404296875, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 882612660, + "step": 9103 + }, + { + "epoch": 0.8901055924912006, + "grad_norm": 11.570585833728938, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 882709388, + "step": 9104 + }, + { + "epoch": 0.8901055924912006, + "loss": 0.04692215472459793, + "loss_ce": 0.001725619425997138, + "loss_iou": 0.3359375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 882709388, + "step": 9104 + }, + { + "epoch": 0.8902033633163864, + "grad_norm": 6.548721117334722, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 882806340, + "step": 9105 + }, + { + "epoch": 0.8902033633163864, + "loss": 0.10178855061531067, + "loss_ce": 0.006268529687076807, + "loss_iou": 0.28125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 882806340, + "step": 9105 + }, + { + "epoch": 0.8903011341415722, + "grad_norm": 5.740788230988233, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 882903060, + "step": 9106 + }, + { + "epoch": 0.8903011341415722, + "loss": 0.05215463787317276, + "loss_ce": 0.0019074457231909037, + "loss_iou": 0.2578125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 882903060, + "step": 9106 + }, + { + "epoch": 0.8903989049667579, + "grad_norm": 6.202251113633384, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 882999820, + "step": 9107 + }, + { + "epoch": 0.8903989049667579, + "loss": 0.09642034769058228, + "loss_ce": 0.0107536930590868, + "loss_iou": 0.2734375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 882999820, + "step": 9107 + }, + { + "epoch": 0.8904966757919437, + "grad_norm": 3.59385339234183, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 883095892, + "step": 9108 + }, + { + "epoch": 0.8904966757919437, + "loss": 0.060034967958927155, + "loss_ce": 0.006484245415776968, + "loss_iou": 0.2578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 883095892, + "step": 9108 + }, + { + "epoch": 0.8905944466171295, + "grad_norm": 7.85935025109042, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 883193632, + "step": 9109 + }, + { + "epoch": 0.8905944466171295, + "loss": 0.07700648903846741, + "loss_ce": 0.0057174209505319595, + "loss_iou": 0.3046875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 883193632, + "step": 9109 + }, + { + "epoch": 0.8906922174423152, + "grad_norm": 23.404572484561893, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 883291316, + "step": 9110 + }, + { + "epoch": 0.8906922174423152, + "loss": 0.047729071229696274, + "loss_ce": 0.0021815854124724865, + "loss_iou": 0.35546875, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 883291316, + "step": 9110 + }, + { + "epoch": 0.890789988267501, + "grad_norm": 17.364756175401652, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 883387568, + "step": 9111 + }, + { + "epoch": 0.890789988267501, + "loss": 0.10389793664216995, + "loss_ce": 0.005509264301508665, + "loss_iou": 0.25390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 883387568, + "step": 9111 + }, + { + "epoch": 0.8908877590926867, + "grad_norm": 7.434448175269457, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 883484200, + "step": 9112 + }, + { + "epoch": 0.8908877590926867, + "loss": 0.10519301146268845, + "loss_ce": 0.007002702448517084, + "loss_iou": 0.17578125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 883484200, + "step": 9112 + }, + { + "epoch": 0.8909855299178725, + "grad_norm": 3.1146210891069086, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 883580788, + "step": 9113 + }, + { + "epoch": 0.8909855299178725, + "loss": 0.08130329847335815, + "loss_ce": 0.0046965498477220535, + "loss_iou": 0.32421875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 883580788, + "step": 9113 + }, + { + "epoch": 0.8910833007430583, + "grad_norm": 4.094212172873299, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 883676500, + "step": 9114 + }, + { + "epoch": 0.8910833007430583, + "loss": 0.1176006942987442, + "loss_ce": 0.005555412732064724, + "loss_iou": 0.21484375, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 883676500, + "step": 9114 + }, + { + "epoch": 0.891181071568244, + "grad_norm": 7.83535398458795, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 883774568, + "step": 9115 + }, + { + "epoch": 0.891181071568244, + "loss": 0.09368059039115906, + "loss_ce": 0.008879872038960457, + "loss_iou": 0.384765625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 883774568, + "step": 9115 + }, + { + "epoch": 0.8912788423934298, + "grad_norm": 7.616701184414545, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 883871460, + "step": 9116 + }, + { + "epoch": 0.8912788423934298, + "loss": 0.08082420378923416, + "loss_ce": 0.006117171607911587, + "loss_iou": 0.271484375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 883871460, + "step": 9116 + }, + { + "epoch": 0.8913766132186156, + "grad_norm": 7.896623703904051, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 883968160, + "step": 9117 + }, + { + "epoch": 0.8913766132186156, + "loss": 0.07002446055412292, + "loss_ce": 0.004762617405503988, + "loss_iou": 0.216796875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 883968160, + "step": 9117 + }, + { + "epoch": 0.8914743840438013, + "grad_norm": 17.981762168949754, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 884065336, + "step": 9118 + }, + { + "epoch": 0.8914743840438013, + "loss": 0.05927865952253342, + "loss_ce": 0.0059949662536382675, + "loss_iou": 0.330078125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 884065336, + "step": 9118 + }, + { + "epoch": 0.8915721548689871, + "grad_norm": 9.99274114284679, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 884163560, + "step": 9119 + }, + { + "epoch": 0.8915721548689871, + "loss": 0.07899090647697449, + "loss_ce": 0.0064811366610229015, + "loss_iou": 0.310546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 884163560, + "step": 9119 + }, + { + "epoch": 0.8916699256941728, + "grad_norm": 11.72553868440969, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 884260904, + "step": 9120 + }, + { + "epoch": 0.8916699256941728, + "loss": 0.07324471324682236, + "loss_ce": 0.0054193963296711445, + "loss_iou": 0.3671875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 884260904, + "step": 9120 + }, + { + "epoch": 0.8917676965193586, + "grad_norm": 5.667929865219063, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 884356772, + "step": 9121 + }, + { + "epoch": 0.8917676965193586, + "loss": 0.08776839077472687, + "loss_ce": 0.006542038172483444, + "loss_iou": 0.185546875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 884356772, + "step": 9121 + }, + { + "epoch": 0.8918654673445444, + "grad_norm": 14.407760184640338, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 884453368, + "step": 9122 + }, + { + "epoch": 0.8918654673445444, + "loss": 0.06341496855020523, + "loss_ce": 0.006553094834089279, + "loss_iou": 0.279296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 884453368, + "step": 9122 + }, + { + "epoch": 0.8919632381697301, + "grad_norm": 4.389533741637728, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 884550612, + "step": 9123 + }, + { + "epoch": 0.8919632381697301, + "loss": 0.05599469691514969, + "loss_ce": 0.010496805422008038, + "loss_iou": 0.2470703125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 884550612, + "step": 9123 + }, + { + "epoch": 0.8920610089949159, + "grad_norm": 3.6891709175169605, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 884647700, + "step": 9124 + }, + { + "epoch": 0.8920610089949159, + "loss": 0.057891689240932465, + "loss_ce": 0.0026243545580655336, + "loss_iou": 0.353515625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 884647700, + "step": 9124 + }, + { + "epoch": 0.8921587798201017, + "grad_norm": 5.72049356392082, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 884744304, + "step": 9125 + }, + { + "epoch": 0.8921587798201017, + "loss": 0.04799434170126915, + "loss_ce": 0.0072075966745615005, + "loss_iou": 0.302734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 884744304, + "step": 9125 + }, + { + "epoch": 0.8922565506452874, + "grad_norm": 4.814541678099384, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 884841068, + "step": 9126 + }, + { + "epoch": 0.8922565506452874, + "loss": 0.0805777907371521, + "loss_ce": 0.0089682936668396, + "loss_iou": 0.3359375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 884841068, + "step": 9126 + }, + { + "epoch": 0.8923543214704732, + "grad_norm": 6.530285817248956, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 884937668, + "step": 9127 + }, + { + "epoch": 0.8923543214704732, + "loss": 0.07484059035778046, + "loss_ce": 0.0032158279791474342, + "loss_iou": 0.2216796875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 884937668, + "step": 9127 + }, + { + "epoch": 0.892452092295659, + "grad_norm": 3.9964124057806756, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 885034536, + "step": 9128 + }, + { + "epoch": 0.892452092295659, + "loss": 0.04597883298993111, + "loss_ce": 0.008076000958681107, + "loss_iou": 0.310546875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 885034536, + "step": 9128 + }, + { + "epoch": 0.8925498631208447, + "grad_norm": 7.6499772463493, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 885130988, + "step": 9129 + }, + { + "epoch": 0.8925498631208447, + "loss": 0.06899592280387878, + "loss_ce": 0.006770584732294083, + "loss_iou": 0.2490234375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 885130988, + "step": 9129 + }, + { + "epoch": 0.8926476339460305, + "grad_norm": 2.5245566790291485, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 885227520, + "step": 9130 + }, + { + "epoch": 0.8926476339460305, + "loss": 0.050818100571632385, + "loss_ce": 0.003035205416381359, + "loss_iou": 0.255859375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 885227520, + "step": 9130 + }, + { + "epoch": 0.8927454047712162, + "grad_norm": 8.19445765648583, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 885324056, + "step": 9131 + }, + { + "epoch": 0.8927454047712162, + "loss": 0.06399727612733841, + "loss_ce": 0.0028705699369311333, + "loss_iou": 0.283203125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 885324056, + "step": 9131 + }, + { + "epoch": 0.892843175596402, + "grad_norm": 16.353070700633136, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 885420436, + "step": 9132 + }, + { + "epoch": 0.892843175596402, + "loss": 0.06855195760726929, + "loss_ce": 0.010438861325383186, + "loss_iou": 0.2734375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 885420436, + "step": 9132 + }, + { + "epoch": 0.8929409464215878, + "grad_norm": 4.814032817516473, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 885518376, + "step": 9133 + }, + { + "epoch": 0.8929409464215878, + "loss": 0.09147492796182632, + "loss_ce": 0.004057331010699272, + "loss_iou": 0.296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 885518376, + "step": 9133 + }, + { + "epoch": 0.8930387172467735, + "grad_norm": 2.6626540205956597, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 885614948, + "step": 9134 + }, + { + "epoch": 0.8930387172467735, + "loss": 0.07593664526939392, + "loss_ce": 0.0051053473725914955, + "loss_iou": 0.248046875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 885614948, + "step": 9134 + }, + { + "epoch": 0.8931364880719593, + "grad_norm": 8.007132370173357, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 885711692, + "step": 9135 + }, + { + "epoch": 0.8931364880719593, + "loss": 0.05845683068037033, + "loss_ce": 0.00810282677412033, + "loss_iou": 0.2197265625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 885711692, + "step": 9135 + }, + { + "epoch": 0.8932342588971451, + "grad_norm": 16.663182185764754, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 885808756, + "step": 9136 + }, + { + "epoch": 0.8932342588971451, + "loss": 0.06000567227602005, + "loss_ce": 0.006111632101237774, + "loss_iou": 0.330078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 885808756, + "step": 9136 + }, + { + "epoch": 0.8933320297223308, + "grad_norm": 9.978187745201016, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 885906268, + "step": 9137 + }, + { + "epoch": 0.8933320297223308, + "loss": 0.06012117117643356, + "loss_ce": 0.006990064866840839, + "loss_iou": 0.384765625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 885906268, + "step": 9137 + }, + { + "epoch": 0.8934298005475166, + "grad_norm": 6.171636360328656, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 886002040, + "step": 9138 + }, + { + "epoch": 0.8934298005475166, + "loss": 0.052806466817855835, + "loss_ce": 0.005618658848106861, + "loss_iou": 0.33203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 886002040, + "step": 9138 + }, + { + "epoch": 0.8935275713727023, + "grad_norm": 7.33652262705563, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 886098856, + "step": 9139 + }, + { + "epoch": 0.8935275713727023, + "loss": 0.06924450397491455, + "loss_ce": 0.006027342285960913, + "loss_iou": 0.37890625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 886098856, + "step": 9139 + }, + { + "epoch": 0.8936253421978881, + "grad_norm": 2.8613605495498, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 886196328, + "step": 9140 + }, + { + "epoch": 0.8936253421978881, + "loss": 0.05062909051775932, + "loss_ce": 0.005371520761400461, + "loss_iou": 0.2177734375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 886196328, + "step": 9140 + }, + { + "epoch": 0.893723113023074, + "grad_norm": 15.19316124117823, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 886293644, + "step": 9141 + }, + { + "epoch": 0.893723113023074, + "loss": 0.07932624220848083, + "loss_ce": 0.007518385071307421, + "loss_iou": 0.310546875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 886293644, + "step": 9141 + }, + { + "epoch": 0.8938208838482596, + "grad_norm": 4.240207563093565, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 886389048, + "step": 9142 + }, + { + "epoch": 0.8938208838482596, + "loss": 0.03710809350013733, + "loss_ce": 0.0037104212678968906, + "loss_iou": 0.14453125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 886389048, + "step": 9142 + }, + { + "epoch": 0.8939186546734454, + "grad_norm": 6.084883578663931, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 886486092, + "step": 9143 + }, + { + "epoch": 0.8939186546734454, + "loss": 0.08055657893419266, + "loss_ce": 0.009450623765587807, + "loss_iou": 0.19921875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 886486092, + "step": 9143 + }, + { + "epoch": 0.8940164254986313, + "grad_norm": 7.03897140880883, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 886582772, + "step": 9144 + }, + { + "epoch": 0.8940164254986313, + "loss": 0.06019795686006546, + "loss_ce": 0.004778037313371897, + "loss_iou": 0.314453125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 886582772, + "step": 9144 + }, + { + "epoch": 0.894114196323817, + "grad_norm": 3.830628141179246, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 886679652, + "step": 9145 + }, + { + "epoch": 0.894114196323817, + "loss": 0.05135808885097504, + "loss_ce": 0.0029419539496302605, + "loss_iou": 0.267578125, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 886679652, + "step": 9145 + }, + { + "epoch": 0.8942119671490028, + "grad_norm": 4.901483224071172, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 886777100, + "step": 9146 + }, + { + "epoch": 0.8942119671490028, + "loss": 0.05837153643369675, + "loss_ce": 0.0019750529900193214, + "loss_iou": 0.2265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 886777100, + "step": 9146 + }, + { + "epoch": 0.8943097379741886, + "grad_norm": 4.20620555963869, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 886873932, + "step": 9147 + }, + { + "epoch": 0.8943097379741886, + "loss": 0.02406400442123413, + "loss_ce": 0.0008401257218793035, + "loss_iou": 0.298828125, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 886873932, + "step": 9147 + }, + { + "epoch": 0.8944075087993743, + "grad_norm": 7.790292282646398, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 886971028, + "step": 9148 + }, + { + "epoch": 0.8944075087993743, + "loss": 0.10012856125831604, + "loss_ce": 0.004578018561005592, + "loss_iou": 0.2451171875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 886971028, + "step": 9148 + }, + { + "epoch": 0.8945052796245601, + "grad_norm": 11.858622912751134, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 887068280, + "step": 9149 + }, + { + "epoch": 0.8945052796245601, + "loss": 0.12661303579807281, + "loss_ce": 0.006007571704685688, + "loss_iou": 0.2890625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 887068280, + "step": 9149 + }, + { + "epoch": 0.8946030504497458, + "grad_norm": 7.558239436154362, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 887164536, + "step": 9150 + }, + { + "epoch": 0.8946030504497458, + "loss": 0.07084642350673676, + "loss_ce": 0.004592760466039181, + "loss_iou": 0.373046875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 887164536, + "step": 9150 + }, + { + "epoch": 0.8947008212749316, + "grad_norm": 7.470697262647587, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 887262616, + "step": 9151 + }, + { + "epoch": 0.8947008212749316, + "loss": 0.05481073260307312, + "loss_ce": 0.003556462936103344, + "loss_iou": 0.40234375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 887262616, + "step": 9151 + }, + { + "epoch": 0.8947985921001174, + "grad_norm": 20.030715853254087, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 887359840, + "step": 9152 + }, + { + "epoch": 0.8947985921001174, + "loss": 0.06739163398742676, + "loss_ce": 0.004891631193459034, + "loss_iou": 0.318359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 887359840, + "step": 9152 + }, + { + "epoch": 0.8948963629253031, + "grad_norm": 14.0018681748933, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 887456244, + "step": 9153 + }, + { + "epoch": 0.8948963629253031, + "loss": 0.0794997289776802, + "loss_ce": 0.0044875191524624825, + "loss_iou": 0.287109375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 887456244, + "step": 9153 + }, + { + "epoch": 0.8949941337504889, + "grad_norm": 5.8817449936268495, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 887553484, + "step": 9154 + }, + { + "epoch": 0.8949941337504889, + "loss": 0.09185324609279633, + "loss_ce": 0.008448712527751923, + "loss_iou": 0.2578125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 887553484, + "step": 9154 + }, + { + "epoch": 0.8950919045756747, + "grad_norm": 17.575982001926555, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 887650132, + "step": 9155 + }, + { + "epoch": 0.8950919045756747, + "loss": 0.12789136171340942, + "loss_ce": 0.007346926257014275, + "loss_iou": 0.294921875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 887650132, + "step": 9155 + }, + { + "epoch": 0.8951896754008604, + "grad_norm": 34.96709535205488, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 887746448, + "step": 9156 + }, + { + "epoch": 0.8951896754008604, + "loss": 0.04132683575153351, + "loss_ce": 0.006506277713924646, + "loss_iou": 0.1884765625, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 887746448, + "step": 9156 + }, + { + "epoch": 0.8952874462260462, + "grad_norm": 4.753951612270715, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 887843472, + "step": 9157 + }, + { + "epoch": 0.8952874462260462, + "loss": 0.09563745558261871, + "loss_ce": 0.00474084448069334, + "loss_iou": 0.271484375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 887843472, + "step": 9157 + }, + { + "epoch": 0.8953852170512319, + "grad_norm": 7.139068383282046, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 887940776, + "step": 9158 + }, + { + "epoch": 0.8953852170512319, + "loss": 0.04887986183166504, + "loss_ce": 0.007848979905247688, + "loss_iou": 0.298828125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 887940776, + "step": 9158 + }, + { + "epoch": 0.8954829878764177, + "grad_norm": 18.88806649493253, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 888038448, + "step": 9159 + }, + { + "epoch": 0.8954829878764177, + "loss": 0.11731716990470886, + "loss_ce": 0.0035266545601189137, + "loss_iou": 0.40625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 888038448, + "step": 9159 + }, + { + "epoch": 0.8955807587016035, + "grad_norm": 13.358406771135064, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 888134752, + "step": 9160 + }, + { + "epoch": 0.8955807587016035, + "loss": 0.06060295179486275, + "loss_ce": 0.0034893029369413853, + "loss_iou": 0.328125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 888134752, + "step": 9160 + }, + { + "epoch": 0.8956785295267892, + "grad_norm": 5.615358745143525, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 888231396, + "step": 9161 + }, + { + "epoch": 0.8956785295267892, + "loss": 0.05351415276527405, + "loss_ce": 0.006623893044888973, + "loss_iou": 0.326171875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 888231396, + "step": 9161 + }, + { + "epoch": 0.895776300351975, + "grad_norm": 5.4290204855159425, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 888328480, + "step": 9162 + }, + { + "epoch": 0.895776300351975, + "loss": 0.07676565647125244, + "loss_ce": 0.00874197669327259, + "loss_iou": 0.2421875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 888328480, + "step": 9162 + }, + { + "epoch": 0.8958740711771608, + "grad_norm": 4.120877656507921, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 888424316, + "step": 9163 + }, + { + "epoch": 0.8958740711771608, + "loss": 0.10999862849712372, + "loss_ce": 0.008603967726230621, + "loss_iou": 0.21484375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 888424316, + "step": 9163 + }, + { + "epoch": 0.8959718420023465, + "grad_norm": 4.711034819437938, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 888520940, + "step": 9164 + }, + { + "epoch": 0.8959718420023465, + "loss": 0.03042970597743988, + "loss_ce": 0.004825457464903593, + "loss_iou": 0.2578125, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 888520940, + "step": 9164 + }, + { + "epoch": 0.8960696128275323, + "grad_norm": 4.841600778378025, + "learning_rate": 5e-05, + "loss": 0.1514, + "num_input_tokens_seen": 888617400, + "step": 9165 + }, + { + "epoch": 0.8960696128275323, + "loss": 0.11206933856010437, + "loss_ce": 0.006280149333178997, + "loss_iou": 0.28515625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 888617400, + "step": 9165 + }, + { + "epoch": 0.896167383652718, + "grad_norm": 5.3746794018131565, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 888715036, + "step": 9166 + }, + { + "epoch": 0.896167383652718, + "loss": 0.05447039008140564, + "loss_ce": 0.007137627340853214, + "loss_iou": 0.4453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 888715036, + "step": 9166 + }, + { + "epoch": 0.8962651544779038, + "grad_norm": 11.657388210276425, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 888811932, + "step": 9167 + }, + { + "epoch": 0.8962651544779038, + "loss": 0.10458284616470337, + "loss_ce": 0.011260099709033966, + "loss_iou": 0.236328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 888811932, + "step": 9167 + }, + { + "epoch": 0.8963629253030896, + "grad_norm": 7.156283897706551, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 888908284, + "step": 9168 + }, + { + "epoch": 0.8963629253030896, + "loss": 0.1068679541349411, + "loss_ce": 0.008212248794734478, + "loss_iou": 0.2373046875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 888908284, + "step": 9168 + }, + { + "epoch": 0.8964606961282753, + "grad_norm": 7.989864204622554, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 889004476, + "step": 9169 + }, + { + "epoch": 0.8964606961282753, + "loss": 0.08217697590589523, + "loss_ce": 0.0025566150434315205, + "loss_iou": 0.298828125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 889004476, + "step": 9169 + }, + { + "epoch": 0.8965584669534611, + "grad_norm": 12.232587413606455, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 889101108, + "step": 9170 + }, + { + "epoch": 0.8965584669534611, + "loss": 0.06823477149009705, + "loss_ce": 0.0024426868185400963, + "loss_iou": 0.33984375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 889101108, + "step": 9170 + }, + { + "epoch": 0.8966562377786469, + "grad_norm": 10.36262606675682, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 889197500, + "step": 9171 + }, + { + "epoch": 0.8966562377786469, + "loss": 0.07047977298498154, + "loss_ce": 0.006049534305930138, + "loss_iou": 0.349609375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 889197500, + "step": 9171 + }, + { + "epoch": 0.8967540086038326, + "grad_norm": 13.503304018419875, + "learning_rate": 5e-05, + "loss": 0.0877, + "num_input_tokens_seen": 889294768, + "step": 9172 + }, + { + "epoch": 0.8967540086038326, + "loss": 0.09423346072435379, + "loss_ce": 0.0023145191371440887, + "loss_iou": 0.337890625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 889294768, + "step": 9172 + }, + { + "epoch": 0.8968517794290184, + "grad_norm": 19.50584959702555, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 889392200, + "step": 9173 + }, + { + "epoch": 0.8968517794290184, + "loss": 0.0911586731672287, + "loss_ce": 0.003939438611268997, + "loss_iou": 0.349609375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 889392200, + "step": 9173 + }, + { + "epoch": 0.8969495502542042, + "grad_norm": 21.056431533381623, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 889489012, + "step": 9174 + }, + { + "epoch": 0.8969495502542042, + "loss": 0.07504120469093323, + "loss_ce": 0.0036071804352104664, + "loss_iou": 0.271484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 889489012, + "step": 9174 + }, + { + "epoch": 0.8970473210793899, + "grad_norm": 6.702579099415692, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 889585620, + "step": 9175 + }, + { + "epoch": 0.8970473210793899, + "loss": 0.11987274885177612, + "loss_ce": 0.008910829201340675, + "loss_iou": 0.271484375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 889585620, + "step": 9175 + }, + { + "epoch": 0.8971450919045757, + "grad_norm": 4.568358630228646, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 889682972, + "step": 9176 + }, + { + "epoch": 0.8971450919045757, + "loss": 0.11244818568229675, + "loss_ce": 0.02052924782037735, + "loss_iou": 0.30078125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 889682972, + "step": 9176 + }, + { + "epoch": 0.8972428627297614, + "grad_norm": 5.3597590929096715, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 889778972, + "step": 9177 + }, + { + "epoch": 0.8972428627297614, + "loss": 0.10017597675323486, + "loss_ce": 0.009180188179016113, + "loss_iou": 0.146484375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 889778972, + "step": 9177 + }, + { + "epoch": 0.8973406335549472, + "grad_norm": 4.588831910943365, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 889876284, + "step": 9178 + }, + { + "epoch": 0.8973406335549472, + "loss": 0.08827929943799973, + "loss_ce": 0.004462771117687225, + "loss_iou": 0.322265625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 889876284, + "step": 9178 + }, + { + "epoch": 0.897438404380133, + "grad_norm": 132.98553211014536, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 889973628, + "step": 9179 + }, + { + "epoch": 0.897438404380133, + "loss": 0.09683620929718018, + "loss_ce": 0.006549948826432228, + "loss_iou": 0.302734375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 889973628, + "step": 9179 + }, + { + "epoch": 0.8975361752053187, + "grad_norm": 46.95042841840253, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 890070908, + "step": 9180 + }, + { + "epoch": 0.8975361752053187, + "loss": 0.09919576346874237, + "loss_ce": 0.007032674737274647, + "loss_iou": 0.310546875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 890070908, + "step": 9180 + }, + { + "epoch": 0.8976339460305045, + "grad_norm": 2.753870826260565, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 890168224, + "step": 9181 + }, + { + "epoch": 0.8976339460305045, + "loss": 0.09538737684488297, + "loss_ce": 0.00750438217073679, + "loss_iou": 0.2890625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 890168224, + "step": 9181 + }, + { + "epoch": 0.8977317168556903, + "grad_norm": 13.728207005975923, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 890265276, + "step": 9182 + }, + { + "epoch": 0.8977317168556903, + "loss": 0.053363338112831116, + "loss_ce": 0.0028758191037923098, + "loss_iou": 0.28515625, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 890265276, + "step": 9182 + }, + { + "epoch": 0.897829487680876, + "grad_norm": 5.2176165596269355, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 890360912, + "step": 9183 + }, + { + "epoch": 0.897829487680876, + "loss": 0.09420377016067505, + "loss_ce": 0.005153476260602474, + "loss_iou": 0.2578125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 890360912, + "step": 9183 + }, + { + "epoch": 0.8979272585060618, + "grad_norm": 13.837021361797289, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 890456984, + "step": 9184 + }, + { + "epoch": 0.8979272585060618, + "loss": 0.07279974222183228, + "loss_ce": 0.00685887923464179, + "loss_iou": 0.265625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 890456984, + "step": 9184 + }, + { + "epoch": 0.8980250293312475, + "grad_norm": 5.721436964661178, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 890554980, + "step": 9185 + }, + { + "epoch": 0.8980250293312475, + "loss": 0.06254338473081589, + "loss_ce": 0.0033850590698421, + "loss_iou": 0.306640625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 890554980, + "step": 9185 + }, + { + "epoch": 0.8981228001564333, + "grad_norm": 3.8265913820098914, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 890651688, + "step": 9186 + }, + { + "epoch": 0.8981228001564333, + "loss": 0.07550347596406937, + "loss_ce": 0.00809777807444334, + "loss_iou": 0.2158203125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 890651688, + "step": 9186 + }, + { + "epoch": 0.8982205709816191, + "grad_norm": 3.1448364391063652, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 890748988, + "step": 9187 + }, + { + "epoch": 0.8982205709816191, + "loss": 0.07225307077169418, + "loss_ce": 0.004031026270240545, + "loss_iou": 0.337890625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 890748988, + "step": 9187 + }, + { + "epoch": 0.8983183418068048, + "grad_norm": 28.421491332456686, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 890845596, + "step": 9188 + }, + { + "epoch": 0.8983183418068048, + "loss": 0.08187466859817505, + "loss_ce": 0.00252897129394114, + "loss_iou": 0.2314453125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 890845596, + "step": 9188 + }, + { + "epoch": 0.8984161126319906, + "grad_norm": 9.396478190311651, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 890941704, + "step": 9189 + }, + { + "epoch": 0.8984161126319906, + "loss": 0.05550595000386238, + "loss_ce": 0.003195005701854825, + "loss_iou": 0.259765625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 890941704, + "step": 9189 + }, + { + "epoch": 0.8985138834571764, + "grad_norm": 11.753171737281885, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 891038272, + "step": 9190 + }, + { + "epoch": 0.8985138834571764, + "loss": 0.06389924883842468, + "loss_ce": 0.003184529021382332, + "loss_iou": 0.296875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 891038272, + "step": 9190 + }, + { + "epoch": 0.8986116542823621, + "grad_norm": 21.3278611508731, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 891135276, + "step": 9191 + }, + { + "epoch": 0.8986116542823621, + "loss": 0.10041938722133636, + "loss_ce": 0.0037854770198464394, + "loss_iou": 0.3125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 891135276, + "step": 9191 + }, + { + "epoch": 0.8987094251075479, + "grad_norm": 9.454289675548942, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 891232664, + "step": 9192 + }, + { + "epoch": 0.8987094251075479, + "loss": 0.08900322020053864, + "loss_ce": 0.00324882660061121, + "loss_iou": 0.357421875, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 891232664, + "step": 9192 + }, + { + "epoch": 0.8988071959327337, + "grad_norm": 6.654841454192793, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 891331156, + "step": 9193 + }, + { + "epoch": 0.8988071959327337, + "loss": 0.13159926235675812, + "loss_ce": 0.012275531888008118, + "loss_iou": 0.330078125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 891331156, + "step": 9193 + }, + { + "epoch": 0.8989049667579194, + "grad_norm": 8.978188482109267, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 891427912, + "step": 9194 + }, + { + "epoch": 0.8989049667579194, + "loss": 0.14902979135513306, + "loss_ce": 0.01769358478486538, + "loss_iou": 0.271484375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 891427912, + "step": 9194 + }, + { + "epoch": 0.8990027375831052, + "grad_norm": 10.019189758188032, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 891525492, + "step": 9195 + }, + { + "epoch": 0.8990027375831052, + "loss": 0.1002744734287262, + "loss_ce": 0.007752811536192894, + "loss_iou": 0.298828125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 891525492, + "step": 9195 + }, + { + "epoch": 0.8991005084082909, + "grad_norm": 7.365577761262603, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 891621936, + "step": 9196 + }, + { + "epoch": 0.8991005084082909, + "loss": 0.059525247663259506, + "loss_ce": 0.004669899120926857, + "loss_iou": 0.369140625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 891621936, + "step": 9196 + }, + { + "epoch": 0.8991982792334767, + "grad_norm": 3.3193082863681806, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 891718752, + "step": 9197 + }, + { + "epoch": 0.8991982792334767, + "loss": 0.08074123412370682, + "loss_ce": 0.003943746909499168, + "loss_iou": 0.263671875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 891718752, + "step": 9197 + }, + { + "epoch": 0.8992960500586625, + "grad_norm": 3.646538005462283, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 891815692, + "step": 9198 + }, + { + "epoch": 0.8992960500586625, + "loss": 0.036242153495550156, + "loss_ce": 0.0023676413111388683, + "loss_iou": 0.2890625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 891815692, + "step": 9198 + }, + { + "epoch": 0.8993938208838482, + "grad_norm": 12.80242591134985, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 891912316, + "step": 9199 + }, + { + "epoch": 0.8993938208838482, + "loss": 0.05263693258166313, + "loss_ce": 0.005693268496543169, + "loss_iou": 0.302734375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 891912316, + "step": 9199 + }, + { + "epoch": 0.899491591709034, + "grad_norm": 12.954044343502886, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 892009024, + "step": 9200 + }, + { + "epoch": 0.899491591709034, + "loss": 0.047084175050258636, + "loss_ce": 0.0034745540469884872, + "loss_iou": 0.28515625, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 892009024, + "step": 9200 + }, + { + "epoch": 0.8995893625342198, + "grad_norm": 4.201362003438596, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 892106148, + "step": 9201 + }, + { + "epoch": 0.8995893625342198, + "loss": 0.07421795278787613, + "loss_ce": 0.0030509610660374165, + "loss_iou": 0.345703125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 892106148, + "step": 9201 + }, + { + "epoch": 0.8996871333594055, + "grad_norm": 3.4731843096290183, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 892203152, + "step": 9202 + }, + { + "epoch": 0.8996871333594055, + "loss": 0.06410267949104309, + "loss_ce": 0.001663717906922102, + "loss_iou": 0.27734375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 892203152, + "step": 9202 + }, + { + "epoch": 0.8997849041845913, + "grad_norm": 7.47523778593018, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 892300648, + "step": 9203 + }, + { + "epoch": 0.8997849041845913, + "loss": 0.06686204671859741, + "loss_ce": 0.0030345283448696136, + "loss_iou": 0.326171875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 892300648, + "step": 9203 + }, + { + "epoch": 0.899882675009777, + "grad_norm": 4.2630419449070045, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 892397204, + "step": 9204 + }, + { + "epoch": 0.899882675009777, + "loss": 0.10731212794780731, + "loss_ce": 0.0053986795246601105, + "loss_iou": 0.25390625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 892397204, + "step": 9204 + }, + { + "epoch": 0.8999804458349628, + "grad_norm": 5.160616219299363, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 892495232, + "step": 9205 + }, + { + "epoch": 0.8999804458349628, + "loss": 0.09433519840240479, + "loss_ce": 0.005529045592993498, + "loss_iou": 0.28125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 892495232, + "step": 9205 + }, + { + "epoch": 0.9000782166601486, + "grad_norm": 10.630492883853242, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 892592496, + "step": 9206 + }, + { + "epoch": 0.9000782166601486, + "loss": 0.10735461115837097, + "loss_ce": 0.002099489327520132, + "loss_iou": 0.310546875, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 892592496, + "step": 9206 + }, + { + "epoch": 0.9001759874853343, + "grad_norm": 9.453774534687078, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 892689000, + "step": 9207 + }, + { + "epoch": 0.9001759874853343, + "loss": 0.07239271700382233, + "loss_ce": 0.0054409680888056755, + "loss_iou": 0.1962890625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 892689000, + "step": 9207 + }, + { + "epoch": 0.9002737583105201, + "grad_norm": 3.702201667847796, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 892785344, + "step": 9208 + }, + { + "epoch": 0.9002737583105201, + "loss": 0.08081439137458801, + "loss_ce": 0.0016517913900315762, + "loss_iou": 0.3046875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 892785344, + "step": 9208 + }, + { + "epoch": 0.900371529135706, + "grad_norm": 6.367014871776797, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 892882276, + "step": 9209 + }, + { + "epoch": 0.900371529135706, + "loss": 0.09142453968524933, + "loss_ce": 0.004968245513737202, + "loss_iou": 0.296875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 892882276, + "step": 9209 + }, + { + "epoch": 0.9004692999608916, + "grad_norm": 34.95581907969382, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 892978480, + "step": 9210 + }, + { + "epoch": 0.9004692999608916, + "loss": 0.06642791628837585, + "loss_ce": 0.0029666107147932053, + "loss_iou": 0.369140625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 892978480, + "step": 9210 + }, + { + "epoch": 0.9005670707860775, + "grad_norm": 9.291492915216214, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 893074540, + "step": 9211 + }, + { + "epoch": 0.9005670707860775, + "loss": 0.06868207454681396, + "loss_ce": 0.006403332110494375, + "loss_iou": 0.201171875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 893074540, + "step": 9211 + }, + { + "epoch": 0.9006648416112631, + "grad_norm": 4.297941530786972, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 893171196, + "step": 9212 + }, + { + "epoch": 0.9006648416112631, + "loss": 0.08811979740858078, + "loss_ce": 0.005142497830092907, + "loss_iou": 0.2578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 893171196, + "step": 9212 + }, + { + "epoch": 0.900762612436449, + "grad_norm": 8.291825676358057, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 893268848, + "step": 9213 + }, + { + "epoch": 0.900762612436449, + "loss": 0.08882015943527222, + "loss_ce": 0.005049398168921471, + "loss_iou": 0.2578125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 893268848, + "step": 9213 + }, + { + "epoch": 0.9008603832616348, + "grad_norm": 4.626718603134612, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 893366072, + "step": 9214 + }, + { + "epoch": 0.9008603832616348, + "loss": 0.08475729078054428, + "loss_ce": 0.005259000696241856, + "loss_iou": 0.333984375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 893366072, + "step": 9214 + }, + { + "epoch": 0.9009581540868205, + "grad_norm": 2.6774336818929623, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 893463328, + "step": 9215 + }, + { + "epoch": 0.9009581540868205, + "loss": 0.04288502410054207, + "loss_ce": 0.005455215461552143, + "loss_iou": 0.32421875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 893463328, + "step": 9215 + }, + { + "epoch": 0.9010559249120063, + "grad_norm": 3.6802169965689226, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 893560280, + "step": 9216 + }, + { + "epoch": 0.9010559249120063, + "loss": 0.06358037889003754, + "loss_ce": 0.004277095198631287, + "loss_iou": 0.169921875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 893560280, + "step": 9216 + }, + { + "epoch": 0.9011536957371921, + "grad_norm": 3.794326223788227, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 893657384, + "step": 9217 + }, + { + "epoch": 0.9011536957371921, + "loss": 0.07040229439735413, + "loss_ce": 0.002767715137451887, + "loss_iou": 0.306640625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 893657384, + "step": 9217 + }, + { + "epoch": 0.9012514665623778, + "grad_norm": 37.33714338426917, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 893754392, + "step": 9218 + }, + { + "epoch": 0.9012514665623778, + "loss": 0.11088414490222931, + "loss_ce": 0.007643182761967182, + "loss_iou": 0.259765625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 893754392, + "step": 9218 + }, + { + "epoch": 0.9013492373875636, + "grad_norm": 12.112301313162112, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 893850132, + "step": 9219 + }, + { + "epoch": 0.9013492373875636, + "loss": 0.05472833663225174, + "loss_ce": 0.0046566203236579895, + "loss_iou": 0.255859375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 893850132, + "step": 9219 + }, + { + "epoch": 0.9014470082127494, + "grad_norm": 8.1213878189845, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 893947356, + "step": 9220 + }, + { + "epoch": 0.9014470082127494, + "loss": 0.07371948659420013, + "loss_ce": 0.003269654931500554, + "loss_iou": 0.263671875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 893947356, + "step": 9220 + }, + { + "epoch": 0.9015447790379351, + "grad_norm": 3.994840078555015, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 894044040, + "step": 9221 + }, + { + "epoch": 0.9015447790379351, + "loss": 0.0639076977968216, + "loss_ce": 0.005374980624765158, + "loss_iou": 0.201171875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 894044040, + "step": 9221 + }, + { + "epoch": 0.9016425498631209, + "grad_norm": 2.2427571514902653, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 894140408, + "step": 9222 + }, + { + "epoch": 0.9016425498631209, + "loss": 0.048862867057323456, + "loss_ce": 0.00820200890302658, + "loss_iou": 0.23046875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 894140408, + "step": 9222 + }, + { + "epoch": 0.9017403206883066, + "grad_norm": 16.457190264017193, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 894238028, + "step": 9223 + }, + { + "epoch": 0.9017403206883066, + "loss": 0.10389155894517899, + "loss_ce": 0.01637858711183071, + "loss_iou": 0.216796875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 894238028, + "step": 9223 + }, + { + "epoch": 0.9018380915134924, + "grad_norm": 6.696909648504793, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 894334776, + "step": 9224 + }, + { + "epoch": 0.9018380915134924, + "loss": 0.1194799542427063, + "loss_ce": 0.006580176297575235, + "loss_iou": 0.291015625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 894334776, + "step": 9224 + }, + { + "epoch": 0.9019358623386782, + "grad_norm": 8.720367455172433, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 894431032, + "step": 9225 + }, + { + "epoch": 0.9019358623386782, + "loss": 0.07061851024627686, + "loss_ce": 0.00353324506431818, + "loss_iou": 0.296875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 894431032, + "step": 9225 + }, + { + "epoch": 0.9020336331638639, + "grad_norm": 5.355397008959922, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 894528164, + "step": 9226 + }, + { + "epoch": 0.9020336331638639, + "loss": 0.057553358376026154, + "loss_ce": 0.002774308668449521, + "loss_iou": 0.29296875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 894528164, + "step": 9226 + }, + { + "epoch": 0.9021314039890497, + "grad_norm": 6.357238474345283, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 894624624, + "step": 9227 + }, + { + "epoch": 0.9021314039890497, + "loss": 0.08562970906496048, + "loss_ce": 0.0036900073755532503, + "loss_iou": 0.20703125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 894624624, + "step": 9227 + }, + { + "epoch": 0.9022291748142355, + "grad_norm": 2.704756735367139, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 894720884, + "step": 9228 + }, + { + "epoch": 0.9022291748142355, + "loss": 0.06754297018051147, + "loss_ce": 0.006385744549334049, + "loss_iou": 0.2490234375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 894720884, + "step": 9228 + }, + { + "epoch": 0.9023269456394212, + "grad_norm": 9.528234690996326, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 894817632, + "step": 9229 + }, + { + "epoch": 0.9023269456394212, + "loss": 0.07237430661916733, + "loss_ce": 0.006563149858266115, + "loss_iou": 0.369140625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 894817632, + "step": 9229 + }, + { + "epoch": 0.902424716464607, + "grad_norm": 15.931695425169393, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 894915116, + "step": 9230 + }, + { + "epoch": 0.902424716464607, + "loss": 0.11518921703100204, + "loss_ce": 0.0066000414080917835, + "loss_iou": 0.31640625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 894915116, + "step": 9230 + }, + { + "epoch": 0.9025224872897927, + "grad_norm": 12.077134059634224, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 895012376, + "step": 9231 + }, + { + "epoch": 0.9025224872897927, + "loss": 0.07510560005903244, + "loss_ce": 0.004709172062575817, + "loss_iou": 0.2470703125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 895012376, + "step": 9231 + }, + { + "epoch": 0.9026202581149785, + "grad_norm": 3.28337374207022, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 895108472, + "step": 9232 + }, + { + "epoch": 0.9026202581149785, + "loss": 0.05429565906524658, + "loss_ce": 0.005284429993480444, + "loss_iou": 0.3515625, + "loss_num": 0.009765625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 895108472, + "step": 9232 + }, + { + "epoch": 0.9027180289401643, + "grad_norm": 17.03389389629997, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 895205756, + "step": 9233 + }, + { + "epoch": 0.9027180289401643, + "loss": 0.08010140061378479, + "loss_ce": 0.005897908471524715, + "loss_iou": 0.1953125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 895205756, + "step": 9233 + }, + { + "epoch": 0.90281579976535, + "grad_norm": 6.954569145918531, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 895303164, + "step": 9234 + }, + { + "epoch": 0.90281579976535, + "loss": 0.061412230134010315, + "loss_ce": 0.0023912345059216022, + "loss_iou": 0.34765625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 895303164, + "step": 9234 + }, + { + "epoch": 0.9029135705905358, + "grad_norm": 15.284638262033752, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 895400740, + "step": 9235 + }, + { + "epoch": 0.9029135705905358, + "loss": 0.08567525446414948, + "loss_ce": 0.005871783941984177, + "loss_iou": 0.34375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 895400740, + "step": 9235 + }, + { + "epoch": 0.9030113414157216, + "grad_norm": 6.986725080506583, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 895497472, + "step": 9236 + }, + { + "epoch": 0.9030113414157216, + "loss": 0.05329291522502899, + "loss_ce": 0.005395576357841492, + "loss_iou": 0.25390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 895497472, + "step": 9236 + }, + { + "epoch": 0.9031091122409073, + "grad_norm": 3.336058532374571, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 895594424, + "step": 9237 + }, + { + "epoch": 0.9031091122409073, + "loss": 0.0955602377653122, + "loss_ce": 0.004419493954628706, + "loss_iou": 0.2197265625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 895594424, + "step": 9237 + }, + { + "epoch": 0.9032068830660931, + "grad_norm": 4.170335724153668, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 895692060, + "step": 9238 + }, + { + "epoch": 0.9032068830660931, + "loss": 0.0436311699450016, + "loss_ce": 0.0054536801762878895, + "loss_iou": 0.26171875, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 895692060, + "step": 9238 + }, + { + "epoch": 0.9033046538912789, + "grad_norm": 5.172448084091364, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 895788232, + "step": 9239 + }, + { + "epoch": 0.9033046538912789, + "loss": 0.09045366942882538, + "loss_ce": 0.011535217985510826, + "loss_iou": 0.302734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 895788232, + "step": 9239 + }, + { + "epoch": 0.9034024247164646, + "grad_norm": 4.49645311298812, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 895884196, + "step": 9240 + }, + { + "epoch": 0.9034024247164646, + "loss": 0.03931484371423721, + "loss_ce": 0.005333524197340012, + "loss_iou": 0.1796875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 895884196, + "step": 9240 + }, + { + "epoch": 0.9035001955416504, + "grad_norm": 9.910012648586674, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 895980940, + "step": 9241 + }, + { + "epoch": 0.9035001955416504, + "loss": 0.043678708374500275, + "loss_ce": 0.0030750720761716366, + "loss_iou": 0.25390625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 895980940, + "step": 9241 + }, + { + "epoch": 0.9035979663668361, + "grad_norm": 2.487294802967097, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 896077796, + "step": 9242 + }, + { + "epoch": 0.9035979663668361, + "loss": 0.07671566307544708, + "loss_ce": 0.0038549485616385937, + "loss_iou": 0.34765625, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 896077796, + "step": 9242 + }, + { + "epoch": 0.9036957371920219, + "grad_norm": 2.3710101127426935, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 896175176, + "step": 9243 + }, + { + "epoch": 0.9036957371920219, + "loss": 0.04889927804470062, + "loss_ce": 0.0055185407400131226, + "loss_iou": 0.28515625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 896175176, + "step": 9243 + }, + { + "epoch": 0.9037935080172077, + "grad_norm": 8.442228643783226, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 896272200, + "step": 9244 + }, + { + "epoch": 0.9037935080172077, + "loss": 0.07845993340015411, + "loss_ce": 0.002410133834928274, + "loss_iou": 0.388671875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 896272200, + "step": 9244 + }, + { + "epoch": 0.9038912788423934, + "grad_norm": 25.13128322505203, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 896369872, + "step": 9245 + }, + { + "epoch": 0.9038912788423934, + "loss": 0.09490326792001724, + "loss_ce": 0.008187569677829742, + "loss_iou": 0.36328125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 896369872, + "step": 9245 + }, + { + "epoch": 0.9039890496675792, + "grad_norm": 15.35972768789044, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 896467168, + "step": 9246 + }, + { + "epoch": 0.9039890496675792, + "loss": 0.06794985383749008, + "loss_ce": 0.005274376831948757, + "loss_iou": 0.34375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 896467168, + "step": 9246 + }, + { + "epoch": 0.904086820492765, + "grad_norm": 3.609421578343166, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 896564000, + "step": 9247 + }, + { + "epoch": 0.904086820492765, + "loss": 0.0920783281326294, + "loss_ce": 0.0037909746170043945, + "loss_iou": 0.28125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 896564000, + "step": 9247 + }, + { + "epoch": 0.9041845913179507, + "grad_norm": 2.774402257905735, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 896660564, + "step": 9248 + }, + { + "epoch": 0.9041845913179507, + "loss": 0.08482575416564941, + "loss_ce": 0.004045721143484116, + "loss_iou": 0.28125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 896660564, + "step": 9248 + }, + { + "epoch": 0.9042823621431365, + "grad_norm": 9.872483444754293, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 896757652, + "step": 9249 + }, + { + "epoch": 0.9042823621431365, + "loss": 0.05725042521953583, + "loss_ce": 0.004645748063921928, + "loss_iou": 0.2490234375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 896757652, + "step": 9249 + }, + { + "epoch": 0.9043801329683222, + "grad_norm": 7.758809807066138, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 896854840, + "step": 9250 + }, + { + "epoch": 0.9043801329683222, + "eval_seeclick_CIoU": 0.5453276634216309, + "eval_seeclick_GIoU": 0.5519596636295319, + "eval_seeclick_IoU": 0.5829761922359467, + "eval_seeclick_MAE_all": 0.06477891653776169, + "eval_seeclick_MAE_h": 0.03120448812842369, + "eval_seeclick_MAE_w": 0.09491753205657005, + "eval_seeclick_MAE_x": 0.10363132506608963, + "eval_seeclick_MAE_y": 0.029362349770963192, + "eval_seeclick_NUM_probability": 0.9999989569187164, + "eval_seeclick_inside_bbox": 0.7997159063816071, + "eval_seeclick_loss": 0.24096648395061493, + "eval_seeclick_loss_ce": 0.0096169444732368, + "eval_seeclick_loss_iou": 0.4251708984375, + "eval_seeclick_loss_num": 0.047245025634765625, + "eval_seeclick_loss_xval": 0.236175537109375, + "eval_seeclick_runtime": 77.049, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 896854840, + "step": 9250 + }, + { + "epoch": 0.9043801329683222, + "eval_icons_CIoU": 0.7024300694465637, + "eval_icons_GIoU": 0.6992669999599457, + "eval_icons_IoU": 0.7309596240520477, + "eval_icons_MAE_all": 0.05236992426216602, + "eval_icons_MAE_h": 0.05480041913688183, + "eval_icons_MAE_w": 0.05022122152149677, + "eval_icons_MAE_x": 0.05087021179497242, + "eval_icons_MAE_y": 0.053587852977216244, + "eval_icons_NUM_probability": 0.9999982714653015, + "eval_icons_inside_bbox": 0.8350694477558136, + "eval_icons_loss": 0.16003970801830292, + "eval_icons_loss_ce": 6.465814408329607e-07, + "eval_icons_loss_iou": 0.367431640625, + "eval_icons_loss_num": 0.034637451171875, + "eval_icons_loss_xval": 0.1731414794921875, + "eval_icons_runtime": 85.5754, + "eval_icons_samples_per_second": 0.584, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 896854840, + "step": 9250 + }, + { + "epoch": 0.9043801329683222, + "eval_screenspot_CIoU": 0.28256476918856305, + "eval_screenspot_GIoU": 0.2817694569627444, + "eval_screenspot_IoU": 0.3859225312868754, + "eval_screenspot_MAE_all": 0.16769633690516153, + "eval_screenspot_MAE_h": 0.10211909313996632, + "eval_screenspot_MAE_w": 0.21919670204321542, + "eval_screenspot_MAE_x": 0.2513306066393852, + "eval_screenspot_MAE_y": 0.09813896318276723, + "eval_screenspot_NUM_probability": 0.9999953707059225, + "eval_screenspot_inside_bbox": 0.6450000007947286, + "eval_screenspot_loss": 0.6101689338684082, + "eval_screenspot_loss_ce": 0.02475586347281933, + "eval_screenspot_loss_iou": 0.3455810546875, + "eval_screenspot_loss_num": 0.118316650390625, + "eval_screenspot_loss_xval": 0.5916341145833334, + "eval_screenspot_runtime": 161.4357, + "eval_screenspot_samples_per_second": 0.551, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 896854840, + "step": 9250 + }, + { + "epoch": 0.9043801329683222, + "eval_compot_CIoU": 0.4759863168001175, + "eval_compot_GIoU": 0.45413345098495483, + "eval_compot_IoU": 0.5418775379657745, + "eval_compot_MAE_all": 0.09721501544117928, + "eval_compot_MAE_h": 0.09321832656860352, + "eval_compot_MAE_w": 0.10405273735523224, + "eval_compot_MAE_x": 0.0952826552093029, + "eval_compot_MAE_y": 0.09630636125802994, + "eval_compot_NUM_probability": 0.9999854862689972, + "eval_compot_inside_bbox": 0.7239583432674408, + "eval_compot_loss": 0.30985718965530396, + "eval_compot_loss_ce": 0.02682037092745304, + "eval_compot_loss_iou": 0.44140625, + "eval_compot_loss_num": 0.052623748779296875, + "eval_compot_loss_xval": 0.263153076171875, + "eval_compot_runtime": 92.2416, + "eval_compot_samples_per_second": 0.542, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 896854840, + "step": 9250 + }, + { + "epoch": 0.9043801329683222, + "loss": 0.2705768346786499, + "loss_ce": 0.02417789213359356, + "loss_iou": 0.451171875, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 896854840, + "step": 9250 + }, + { + "epoch": 0.904477903793508, + "grad_norm": 6.574749590954933, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 896951992, + "step": 9251 + }, + { + "epoch": 0.904477903793508, + "loss": 0.1226281076669693, + "loss_ce": 0.005104908719658852, + "loss_iou": 0.2119140625, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 896951992, + "step": 9251 + }, + { + "epoch": 0.9045756746186938, + "grad_norm": 1.6775941340740876, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 897048184, + "step": 9252 + }, + { + "epoch": 0.9045756746186938, + "loss": 0.07274886965751648, + "loss_ce": 0.00644562067463994, + "loss_iou": 0.15625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 897048184, + "step": 9252 + }, + { + "epoch": 0.9046734454438795, + "grad_norm": 8.334710566929951, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 897145864, + "step": 9253 + }, + { + "epoch": 0.9046734454438795, + "loss": 0.07753210514783859, + "loss_ce": 0.004823974799364805, + "loss_iou": 0.2734375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 897145864, + "step": 9253 + }, + { + "epoch": 0.9047712162690653, + "grad_norm": 3.4734802528519038, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 897242404, + "step": 9254 + }, + { + "epoch": 0.9047712162690653, + "loss": 0.049409881234169006, + "loss_ce": 0.005907072219997644, + "loss_iou": 0.36328125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 897242404, + "step": 9254 + }, + { + "epoch": 0.9048689870942511, + "grad_norm": 32.27601103426994, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 897339212, + "step": 9255 + }, + { + "epoch": 0.9048689870942511, + "loss": 0.08242996782064438, + "loss_ce": 0.005022131372243166, + "loss_iou": 0.26953125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 897339212, + "step": 9255 + }, + { + "epoch": 0.9049667579194368, + "grad_norm": 25.44448497014627, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 897437148, + "step": 9256 + }, + { + "epoch": 0.9049667579194368, + "loss": 0.05210255831480026, + "loss_ce": 0.0038771594408899546, + "loss_iou": 0.29296875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 897437148, + "step": 9256 + }, + { + "epoch": 0.9050645287446226, + "grad_norm": 10.998409544428458, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 897534324, + "step": 9257 + }, + { + "epoch": 0.9050645287446226, + "loss": 0.058371447026729584, + "loss_ce": 0.00844468642026186, + "loss_iou": 0.244140625, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 897534324, + "step": 9257 + }, + { + "epoch": 0.9051622995698083, + "grad_norm": 8.06783010690133, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 897631008, + "step": 9258 + }, + { + "epoch": 0.9051622995698083, + "loss": 0.08539807051420212, + "loss_ce": 0.007730834651738405, + "loss_iou": 0.24609375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 897631008, + "step": 9258 + }, + { + "epoch": 0.9052600703949941, + "grad_norm": 10.07921758777871, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 897727544, + "step": 9259 + }, + { + "epoch": 0.9052600703949941, + "loss": 0.0388285294175148, + "loss_ce": 0.0035196938551962376, + "loss_iou": 0.271484375, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 897727544, + "step": 9259 + }, + { + "epoch": 0.9053578412201799, + "grad_norm": 15.564406986661904, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 897824796, + "step": 9260 + }, + { + "epoch": 0.9053578412201799, + "loss": 0.03428787365555763, + "loss_ce": 0.0031828321516513824, + "loss_iou": 0.34765625, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 897824796, + "step": 9260 + }, + { + "epoch": 0.9054556120453656, + "grad_norm": 8.90925758010181, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 897921656, + "step": 9261 + }, + { + "epoch": 0.9054556120453656, + "loss": 0.07511691749095917, + "loss_ce": 0.00309542752802372, + "loss_iou": 0.314453125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 897921656, + "step": 9261 + }, + { + "epoch": 0.9055533828705514, + "grad_norm": 5.644808016729608, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 898018548, + "step": 9262 + }, + { + "epoch": 0.9055533828705514, + "loss": 0.07124514132738113, + "loss_ce": 0.005371043458580971, + "loss_iou": 0.208984375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 898018548, + "step": 9262 + }, + { + "epoch": 0.9056511536957372, + "grad_norm": 2.965852700695088, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 898116220, + "step": 9263 + }, + { + "epoch": 0.9056511536957372, + "loss": 0.050072744488716125, + "loss_ce": 0.0028582357335835695, + "loss_iou": 0.18359375, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 898116220, + "step": 9263 + }, + { + "epoch": 0.9057489245209229, + "grad_norm": 4.305135679462049, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 898213384, + "step": 9264 + }, + { + "epoch": 0.9057489245209229, + "loss": 0.045578788965940475, + "loss_ce": 0.004364802036434412, + "loss_iou": 0.306640625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 898213384, + "step": 9264 + }, + { + "epoch": 0.9058466953461087, + "grad_norm": 4.89529677339887, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 898310616, + "step": 9265 + }, + { + "epoch": 0.9058466953461087, + "loss": 0.06886568665504456, + "loss_ce": 0.004427822306752205, + "loss_iou": 0.380859375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 898310616, + "step": 9265 + }, + { + "epoch": 0.9059444661712945, + "grad_norm": 13.903834187757887, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 898406608, + "step": 9266 + }, + { + "epoch": 0.9059444661712945, + "loss": 0.061538420617580414, + "loss_ce": 0.0018422250868752599, + "loss_iou": 0.251953125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 898406608, + "step": 9266 + }, + { + "epoch": 0.9060422369964802, + "grad_norm": 3.20432012239221, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 898503784, + "step": 9267 + }, + { + "epoch": 0.9060422369964802, + "loss": 0.05850028991699219, + "loss_ce": 0.007352827582508326, + "loss_iou": 0.28515625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 898503784, + "step": 9267 + }, + { + "epoch": 0.906140007821666, + "grad_norm": 8.100210015812385, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 898600128, + "step": 9268 + }, + { + "epoch": 0.906140007821666, + "loss": 0.1134430393576622, + "loss_ce": 0.0075012678280472755, + "loss_iou": 0.318359375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 898600128, + "step": 9268 + }, + { + "epoch": 0.9062377786468517, + "grad_norm": 13.901068325750291, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 898697516, + "step": 9269 + }, + { + "epoch": 0.9062377786468517, + "loss": 0.0803377628326416, + "loss_ce": 0.0009462826419621706, + "loss_iou": 0.2578125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 898697516, + "step": 9269 + }, + { + "epoch": 0.9063355494720375, + "grad_norm": 9.545473667712576, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 898793408, + "step": 9270 + }, + { + "epoch": 0.9063355494720375, + "loss": 0.10045984387397766, + "loss_ce": 0.010509278625249863, + "loss_iou": 0.2275390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 898793408, + "step": 9270 + }, + { + "epoch": 0.9064333202972233, + "grad_norm": 5.223582351012415, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 898889816, + "step": 9271 + }, + { + "epoch": 0.9064333202972233, + "loss": 0.07735796272754669, + "loss_ce": 0.008655264042317867, + "loss_iou": 0.2021484375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 898889816, + "step": 9271 + }, + { + "epoch": 0.906531091122409, + "grad_norm": 6.356463844975381, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 898986100, + "step": 9272 + }, + { + "epoch": 0.906531091122409, + "loss": 0.06012963503599167, + "loss_ce": 0.004007810726761818, + "loss_iou": 0.220703125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 898986100, + "step": 9272 + }, + { + "epoch": 0.9066288619475948, + "grad_norm": 6.437567281645293, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 899082844, + "step": 9273 + }, + { + "epoch": 0.9066288619475948, + "loss": 0.06420935690402985, + "loss_ce": 0.006386172026395798, + "loss_iou": 0.265625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 899082844, + "step": 9273 + }, + { + "epoch": 0.9067266327727806, + "grad_norm": 12.370773277496829, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 899180028, + "step": 9274 + }, + { + "epoch": 0.9067266327727806, + "loss": 0.06817296147346497, + "loss_ce": 0.005142719950526953, + "loss_iou": 0.283203125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 899180028, + "step": 9274 + }, + { + "epoch": 0.9068244035979663, + "grad_norm": 4.222244226561537, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 899277204, + "step": 9275 + }, + { + "epoch": 0.9068244035979663, + "loss": 0.05987143889069557, + "loss_ce": 0.004222633317112923, + "loss_iou": 0.345703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 899277204, + "step": 9275 + }, + { + "epoch": 0.9069221744231521, + "grad_norm": 5.114250398403313, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 899374400, + "step": 9276 + }, + { + "epoch": 0.9069221744231521, + "loss": 0.06843218952417374, + "loss_ce": 0.0048945872113108635, + "loss_iou": 0.3359375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 899374400, + "step": 9276 + }, + { + "epoch": 0.9070199452483378, + "grad_norm": 3.7702595029068053, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 899470988, + "step": 9277 + }, + { + "epoch": 0.9070199452483378, + "loss": 0.06590321660041809, + "loss_ce": 0.02075246162712574, + "loss_iou": 0.26171875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 899470988, + "step": 9277 + }, + { + "epoch": 0.9071177160735236, + "grad_norm": 4.398882141261218, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 899567792, + "step": 9278 + }, + { + "epoch": 0.9071177160735236, + "loss": 0.032819125801324844, + "loss_ce": 0.004742953926324844, + "loss_iou": 0.291015625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 899567792, + "step": 9278 + }, + { + "epoch": 0.9072154868987095, + "grad_norm": 4.32504747690967, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 899665112, + "step": 9279 + }, + { + "epoch": 0.9072154868987095, + "loss": 0.06351390480995178, + "loss_ce": 0.0037757502868771553, + "loss_iou": 0.30859375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 899665112, + "step": 9279 + }, + { + "epoch": 0.9073132577238952, + "grad_norm": 10.589580234921943, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 899762012, + "step": 9280 + }, + { + "epoch": 0.9073132577238952, + "loss": 0.0565471425652504, + "loss_ce": 0.004789326805621386, + "loss_iou": 0.2353515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 899762012, + "step": 9280 + }, + { + "epoch": 0.907411028549081, + "grad_norm": 27.141825503517715, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 899858680, + "step": 9281 + }, + { + "epoch": 0.907411028549081, + "loss": 0.07272179424762726, + "loss_ce": 0.00584252318367362, + "loss_iou": 0.29296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 899858680, + "step": 9281 + }, + { + "epoch": 0.9075087993742668, + "grad_norm": 10.620219871469521, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 899955640, + "step": 9282 + }, + { + "epoch": 0.9075087993742668, + "loss": 0.0734834149479866, + "loss_ce": 0.010594312101602554, + "loss_iou": 0.306640625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 899955640, + "step": 9282 + }, + { + "epoch": 0.9076065701994525, + "grad_norm": 7.744996542649201, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 900052264, + "step": 9283 + }, + { + "epoch": 0.9076065701994525, + "loss": 0.11358974874019623, + "loss_ce": 0.0035891463048756123, + "loss_iou": 0.2099609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 900052264, + "step": 9283 + }, + { + "epoch": 0.9077043410246383, + "grad_norm": 5.217658249067613, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 900149116, + "step": 9284 + }, + { + "epoch": 0.9077043410246383, + "loss": 0.07894475013017654, + "loss_ce": 0.004924364387989044, + "loss_iou": 0.294921875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 900149116, + "step": 9284 + }, + { + "epoch": 0.9078021118498241, + "grad_norm": 3.0472959965056368, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 900246216, + "step": 9285 + }, + { + "epoch": 0.9078021118498241, + "loss": 0.057650867849588394, + "loss_ce": 0.002536123152822256, + "loss_iou": 0.306640625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 900246216, + "step": 9285 + }, + { + "epoch": 0.9078998826750098, + "grad_norm": 3.2716781302760385, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 900343040, + "step": 9286 + }, + { + "epoch": 0.9078998826750098, + "loss": 0.06320106983184814, + "loss_ce": 0.003836751217022538, + "loss_iou": 0.22265625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 900343040, + "step": 9286 + }, + { + "epoch": 0.9079976535001956, + "grad_norm": 11.891434884786499, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 900439036, + "step": 9287 + }, + { + "epoch": 0.9079976535001956, + "loss": 0.0759408175945282, + "loss_ce": 0.0038811825215816498, + "loss_iou": 0.2412109375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 900439036, + "step": 9287 + }, + { + "epoch": 0.9080954243253813, + "grad_norm": 6.065000877117139, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 900535840, + "step": 9288 + }, + { + "epoch": 0.9080954243253813, + "loss": 0.08759383857250214, + "loss_ce": 0.0031211895402520895, + "loss_iou": 0.228515625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 900535840, + "step": 9288 + }, + { + "epoch": 0.9081931951505671, + "grad_norm": 13.751936962264656, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 900633572, + "step": 9289 + }, + { + "epoch": 0.9081931951505671, + "loss": 0.09053751826286316, + "loss_ce": 0.004935707896947861, + "loss_iou": 0.390625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 900633572, + "step": 9289 + }, + { + "epoch": 0.9082909659757529, + "grad_norm": 5.7340892301359245, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 900730440, + "step": 9290 + }, + { + "epoch": 0.9082909659757529, + "loss": 0.03829128295183182, + "loss_ce": 0.003611869178712368, + "loss_iou": 0.34765625, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 900730440, + "step": 9290 + }, + { + "epoch": 0.9083887368009386, + "grad_norm": 7.1809962420129585, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 900827188, + "step": 9291 + }, + { + "epoch": 0.9083887368009386, + "loss": 0.03789067640900612, + "loss_ce": 0.0035049966536462307, + "loss_iou": 0.2734375, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 900827188, + "step": 9291 + }, + { + "epoch": 0.9084865076261244, + "grad_norm": 5.6925360581883355, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 900923512, + "step": 9292 + }, + { + "epoch": 0.9084865076261244, + "loss": 0.04416878893971443, + "loss_ce": 0.0026820495259016752, + "loss_iou": 0.2021484375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 900923512, + "step": 9292 + }, + { + "epoch": 0.9085842784513102, + "grad_norm": 16.136638068281467, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 901020920, + "step": 9293 + }, + { + "epoch": 0.9085842784513102, + "loss": 0.08264917880296707, + "loss_ce": 0.007774302735924721, + "loss_iou": 0.25390625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 901020920, + "step": 9293 + }, + { + "epoch": 0.9086820492764959, + "grad_norm": 24.09109364989661, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 901117072, + "step": 9294 + }, + { + "epoch": 0.9086820492764959, + "loss": 0.08313979208469391, + "loss_ce": 0.004934682510793209, + "loss_iou": 0.267578125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 901117072, + "step": 9294 + }, + { + "epoch": 0.9087798201016817, + "grad_norm": 4.752505668839765, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 901214892, + "step": 9295 + }, + { + "epoch": 0.9087798201016817, + "loss": 0.09815753996372223, + "loss_ce": 0.005811345763504505, + "loss_iou": 0.298828125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 901214892, + "step": 9295 + }, + { + "epoch": 0.9088775909268674, + "grad_norm": 3.3002938658920034, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 901310520, + "step": 9296 + }, + { + "epoch": 0.9088775909268674, + "loss": 0.048616085201501846, + "loss_ce": 0.00468602916225791, + "loss_iou": 0.294921875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 901310520, + "step": 9296 + }, + { + "epoch": 0.9089753617520532, + "grad_norm": 6.111627032626551, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 901407248, + "step": 9297 + }, + { + "epoch": 0.9089753617520532, + "loss": 0.054035186767578125, + "loss_ce": 0.0039939871057868, + "loss_iou": 0.21484375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 901407248, + "step": 9297 + }, + { + "epoch": 0.909073132577239, + "grad_norm": 4.668348695089845, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 901504344, + "step": 9298 + }, + { + "epoch": 0.909073132577239, + "loss": 0.06673815846443176, + "loss_ce": 0.0034675977658480406, + "loss_iou": 0.20703125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 901504344, + "step": 9298 + }, + { + "epoch": 0.9091709034024247, + "grad_norm": 6.166689714105456, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 901601292, + "step": 9299 + }, + { + "epoch": 0.9091709034024247, + "loss": 0.08811676502227783, + "loss_ce": 0.00919831357896328, + "loss_iou": 0.23828125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 901601292, + "step": 9299 + }, + { + "epoch": 0.9092686742276105, + "grad_norm": 10.60221062184404, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 901698176, + "step": 9300 + }, + { + "epoch": 0.9092686742276105, + "loss": 0.08733600378036499, + "loss_ce": 0.006922181695699692, + "loss_iou": 0.306640625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 901698176, + "step": 9300 + }, + { + "epoch": 0.9093664450527963, + "grad_norm": 5.699711271339359, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 901795532, + "step": 9301 + }, + { + "epoch": 0.9093664450527963, + "loss": 0.06453515589237213, + "loss_ce": 0.004346864297986031, + "loss_iou": 0.31640625, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 901795532, + "step": 9301 + }, + { + "epoch": 0.909464215877982, + "grad_norm": 2.8395241308810784, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 901892912, + "step": 9302 + }, + { + "epoch": 0.909464215877982, + "loss": 0.06037959083914757, + "loss_ce": 0.005470839329063892, + "loss_iou": 0.216796875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 901892912, + "step": 9302 + }, + { + "epoch": 0.9095619867031678, + "grad_norm": 2.777622307834887, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 901990484, + "step": 9303 + }, + { + "epoch": 0.9095619867031678, + "loss": 0.09031158685684204, + "loss_ce": 0.0035195965319871902, + "loss_iou": 0.330078125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 901990484, + "step": 9303 + }, + { + "epoch": 0.9096597575283535, + "grad_norm": 6.163290634416148, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 902085060, + "step": 9304 + }, + { + "epoch": 0.9096597575283535, + "loss": 0.07140471041202545, + "loss_ce": 0.005715623497962952, + "loss_iou": 0.328125, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 902085060, + "step": 9304 + }, + { + "epoch": 0.9097575283535393, + "grad_norm": 4.165299637812167, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 902181756, + "step": 9305 + }, + { + "epoch": 0.9097575283535393, + "loss": 0.04078170657157898, + "loss_ce": 0.00724288634955883, + "loss_iou": 0.1748046875, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 902181756, + "step": 9305 + }, + { + "epoch": 0.9098552991787251, + "grad_norm": 2.9719461873006923, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 902279016, + "step": 9306 + }, + { + "epoch": 0.9098552991787251, + "loss": 0.050069041550159454, + "loss_ce": 0.008214183151721954, + "loss_iou": 0.29296875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 902279016, + "step": 9306 + }, + { + "epoch": 0.9099530700039108, + "grad_norm": 3.4595676023943827, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 902375808, + "step": 9307 + }, + { + "epoch": 0.9099530700039108, + "loss": 0.08458300679922104, + "loss_ce": 0.00844164751470089, + "loss_iou": 0.3125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 902375808, + "step": 9307 + }, + { + "epoch": 0.9100508408290966, + "grad_norm": 14.295087775407817, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 902472308, + "step": 9308 + }, + { + "epoch": 0.9100508408290966, + "loss": 0.050806161016225815, + "loss_ce": 0.003717538435012102, + "loss_iou": 0.365234375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 902472308, + "step": 9308 + }, + { + "epoch": 0.9101486116542824, + "grad_norm": 8.133243600214815, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 902568636, + "step": 9309 + }, + { + "epoch": 0.9101486116542824, + "loss": 0.06121750921010971, + "loss_ce": 0.0057975854724645615, + "loss_iou": 0.35546875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 902568636, + "step": 9309 + }, + { + "epoch": 0.9102463824794681, + "grad_norm": 6.590801963526822, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 902665144, + "step": 9310 + }, + { + "epoch": 0.9102463824794681, + "loss": 0.0906866192817688, + "loss_ce": 0.0034292303025722504, + "loss_iou": 0.423828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 902665144, + "step": 9310 + }, + { + "epoch": 0.9103441533046539, + "grad_norm": 2.6637131425309053, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 902761020, + "step": 9311 + }, + { + "epoch": 0.9103441533046539, + "loss": 0.04195909947156906, + "loss_ce": 0.004256540443748236, + "loss_iou": 0.189453125, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 902761020, + "step": 9311 + }, + { + "epoch": 0.9104419241298397, + "grad_norm": 7.376828654127736, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 902857436, + "step": 9312 + }, + { + "epoch": 0.9104419241298397, + "loss": 0.09195443987846375, + "loss_ce": 0.007270071655511856, + "loss_iou": 0.2421875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 902857436, + "step": 9312 + }, + { + "epoch": 0.9105396949550254, + "grad_norm": 9.09335191559866, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 902954344, + "step": 9313 + }, + { + "epoch": 0.9105396949550254, + "loss": 0.045482732355594635, + "loss_ce": 0.002796267857775092, + "loss_iou": 0.20703125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 902954344, + "step": 9313 + }, + { + "epoch": 0.9106374657802112, + "grad_norm": 5.2499260453137975, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 903050788, + "step": 9314 + }, + { + "epoch": 0.9106374657802112, + "loss": 0.1131199449300766, + "loss_ce": 0.005667550954967737, + "loss_iou": 0.1884765625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 903050788, + "step": 9314 + }, + { + "epoch": 0.9107352366053969, + "grad_norm": 11.750735784337143, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 903147328, + "step": 9315 + }, + { + "epoch": 0.9107352366053969, + "loss": 0.06881946325302124, + "loss_ce": 0.006040991749614477, + "loss_iou": 0.2373046875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 903147328, + "step": 9315 + }, + { + "epoch": 0.9108330074305827, + "grad_norm": 11.340889390954919, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 903243516, + "step": 9316 + }, + { + "epoch": 0.9108330074305827, + "loss": 0.029119156301021576, + "loss_ce": 0.003377579152584076, + "loss_iou": 0.16796875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 903243516, + "step": 9316 + }, + { + "epoch": 0.9109307782557685, + "grad_norm": 6.219236803001953, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 903340460, + "step": 9317 + }, + { + "epoch": 0.9109307782557685, + "loss": 0.04195689409971237, + "loss_ce": 0.0032377163879573345, + "loss_iou": 0.2470703125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 903340460, + "step": 9317 + }, + { + "epoch": 0.9110285490809542, + "grad_norm": 5.878159402712767, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 903438180, + "step": 9318 + }, + { + "epoch": 0.9110285490809542, + "loss": 0.08027037978172302, + "loss_ce": 0.0029693502001464367, + "loss_iou": 0.3359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 903438180, + "step": 9318 + }, + { + "epoch": 0.91112631990614, + "grad_norm": 9.423639701132393, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 903535340, + "step": 9319 + }, + { + "epoch": 0.91112631990614, + "loss": 0.07271384447813034, + "loss_ce": 0.006177895236760378, + "loss_iou": 0.27734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 903535340, + "step": 9319 + }, + { + "epoch": 0.9112240907313258, + "grad_norm": 10.137715848208158, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 903631972, + "step": 9320 + }, + { + "epoch": 0.9112240907313258, + "loss": 0.07264158129692078, + "loss_ce": 0.005487651564180851, + "loss_iou": 0.2216796875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 903631972, + "step": 9320 + }, + { + "epoch": 0.9113218615565115, + "grad_norm": 2.8114258273188715, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 903729108, + "step": 9321 + }, + { + "epoch": 0.9113218615565115, + "loss": 0.05066289007663727, + "loss_ce": 0.002258196473121643, + "loss_iou": 0.259765625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 903729108, + "step": 9321 + }, + { + "epoch": 0.9114196323816973, + "grad_norm": 5.094356721603333, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 903826024, + "step": 9322 + }, + { + "epoch": 0.9114196323816973, + "loss": 0.05620002746582031, + "loss_ce": 0.0048999725840985775, + "loss_iou": 0.296875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 903826024, + "step": 9322 + }, + { + "epoch": 0.911517403206883, + "grad_norm": 11.428412389449809, + "learning_rate": 5e-05, + "loss": 0.1011, + "num_input_tokens_seen": 903923836, + "step": 9323 + }, + { + "epoch": 0.911517403206883, + "loss": 0.1294487714767456, + "loss_ce": 0.002373573835939169, + "loss_iou": 0.349609375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 903923836, + "step": 9323 + }, + { + "epoch": 0.9116151740320688, + "grad_norm": 12.434108249353871, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 904021464, + "step": 9324 + }, + { + "epoch": 0.9116151740320688, + "loss": 0.05890587717294693, + "loss_ce": 0.004843990318477154, + "loss_iou": 0.384765625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 904021464, + "step": 9324 + }, + { + "epoch": 0.9117129448572546, + "grad_norm": 24.774567161923866, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 904118536, + "step": 9325 + }, + { + "epoch": 0.9117129448572546, + "loss": 0.0665905773639679, + "loss_ce": 0.0011761474888771772, + "loss_iou": 0.25390625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 904118536, + "step": 9325 + }, + { + "epoch": 0.9118107156824403, + "grad_norm": 10.821964330244935, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 904214996, + "step": 9326 + }, + { + "epoch": 0.9118107156824403, + "loss": 0.12848061323165894, + "loss_ce": 0.009446816518902779, + "loss_iou": 0.328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 904214996, + "step": 9326 + }, + { + "epoch": 0.9119084865076261, + "grad_norm": 15.153259997472842, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 904312236, + "step": 9327 + }, + { + "epoch": 0.9119084865076261, + "loss": 0.0790170207619667, + "loss_ce": 0.005622243508696556, + "loss_iou": 0.306640625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 904312236, + "step": 9327 + }, + { + "epoch": 0.9120062573328119, + "grad_norm": 16.09909067598407, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 904409436, + "step": 9328 + }, + { + "epoch": 0.9120062573328119, + "loss": 0.07278583198785782, + "loss_ce": 0.004991034045815468, + "loss_iou": 0.287109375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 904409436, + "step": 9328 + }, + { + "epoch": 0.9121040281579976, + "grad_norm": 2.668699070252194, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 904506188, + "step": 9329 + }, + { + "epoch": 0.9121040281579976, + "loss": 0.08275353908538818, + "loss_ce": 0.006879208609461784, + "loss_iou": 0.306640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 904506188, + "step": 9329 + }, + { + "epoch": 0.9122017989831834, + "grad_norm": 10.415716226004415, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 904603196, + "step": 9330 + }, + { + "epoch": 0.9122017989831834, + "loss": 0.1003594845533371, + "loss_ce": 0.005388789344578981, + "loss_iou": 0.2734375, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 904603196, + "step": 9330 + }, + { + "epoch": 0.9122995698083692, + "grad_norm": 20.41517704644487, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 904700700, + "step": 9331 + }, + { + "epoch": 0.9122995698083692, + "loss": 0.13239705562591553, + "loss_ce": 0.008816119283437729, + "loss_iou": 0.373046875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 904700700, + "step": 9331 + }, + { + "epoch": 0.9123973406335549, + "grad_norm": 7.920074213977285, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 904797632, + "step": 9332 + }, + { + "epoch": 0.9123973406335549, + "loss": 0.07149685174226761, + "loss_ce": 0.009370691142976284, + "loss_iou": 0.333984375, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 904797632, + "step": 9332 + }, + { + "epoch": 0.9124951114587407, + "grad_norm": 2.908118226898374, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 904894660, + "step": 9333 + }, + { + "epoch": 0.9124951114587407, + "loss": 0.05929568409919739, + "loss_ce": 0.005416897591203451, + "loss_iou": 0.255859375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 904894660, + "step": 9333 + }, + { + "epoch": 0.9125928822839264, + "grad_norm": 2.9819729751995343, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 904990544, + "step": 9334 + }, + { + "epoch": 0.9125928822839264, + "loss": 0.048457395285367966, + "loss_ce": 0.0026428799610584974, + "loss_iou": 0.21484375, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 904990544, + "step": 9334 + }, + { + "epoch": 0.9126906531091122, + "grad_norm": 2.944799026024482, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 905087056, + "step": 9335 + }, + { + "epoch": 0.9126906531091122, + "loss": 0.09460315108299255, + "loss_ce": 0.00658282358199358, + "loss_iou": 0.244140625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 905087056, + "step": 9335 + }, + { + "epoch": 0.912788423934298, + "grad_norm": 14.020254481315225, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 905183328, + "step": 9336 + }, + { + "epoch": 0.912788423934298, + "loss": 0.05846167728304863, + "loss_ce": 0.002980720717459917, + "loss_iou": 0.2490234375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 905183328, + "step": 9336 + }, + { + "epoch": 0.9128861947594837, + "grad_norm": 11.732674061447911, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 905279924, + "step": 9337 + }, + { + "epoch": 0.9128861947594837, + "loss": 0.07786712050437927, + "loss_ce": 0.004708860069513321, + "loss_iou": 0.2578125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 905279924, + "step": 9337 + }, + { + "epoch": 0.9129839655846695, + "grad_norm": 12.165750443575645, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 905377156, + "step": 9338 + }, + { + "epoch": 0.9129839655846695, + "loss": 0.11475811898708344, + "loss_ce": 0.006802183575928211, + "loss_iou": 0.306640625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 905377156, + "step": 9338 + }, + { + "epoch": 0.9130817364098553, + "grad_norm": 18.672000233706285, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 905474388, + "step": 9339 + }, + { + "epoch": 0.9130817364098553, + "loss": 0.09244972467422485, + "loss_ce": 0.003750389674678445, + "loss_iou": 0.283203125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 905474388, + "step": 9339 + }, + { + "epoch": 0.913179507235041, + "grad_norm": 10.047781264934226, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 905572508, + "step": 9340 + }, + { + "epoch": 0.913179507235041, + "loss": 0.07433083653450012, + "loss_ce": 0.0024161627516150475, + "loss_iou": 0.365234375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 905572508, + "step": 9340 + }, + { + "epoch": 0.9132772780602268, + "grad_norm": 4.759743165013883, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 905669656, + "step": 9341 + }, + { + "epoch": 0.9132772780602268, + "loss": 0.0856090560555458, + "loss_ce": 0.006221395451575518, + "loss_iou": 0.27734375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 905669656, + "step": 9341 + }, + { + "epoch": 0.9133750488854125, + "grad_norm": 24.216864481232637, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 905766576, + "step": 9342 + }, + { + "epoch": 0.9133750488854125, + "loss": 0.08336630463600159, + "loss_ce": 0.003898537252098322, + "loss_iou": 0.2109375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 905766576, + "step": 9342 + }, + { + "epoch": 0.9134728197105983, + "grad_norm": 34.50831653989226, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 905862908, + "step": 9343 + }, + { + "epoch": 0.9134728197105983, + "loss": 0.06153168901801109, + "loss_ce": 0.005760815925896168, + "loss_iou": 0.302734375, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 905862908, + "step": 9343 + }, + { + "epoch": 0.9135705905357842, + "grad_norm": 139.45582156836974, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 905959956, + "step": 9344 + }, + { + "epoch": 0.9135705905357842, + "loss": 0.06654052436351776, + "loss_ce": 0.006222530268132687, + "loss_iou": 0.2353515625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 905959956, + "step": 9344 + }, + { + "epoch": 0.9136683613609698, + "grad_norm": 16.886921906095026, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 906056816, + "step": 9345 + }, + { + "epoch": 0.9136683613609698, + "loss": 0.08517725020647049, + "loss_ce": 0.0032833334989845753, + "loss_iou": 0.23828125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 906056816, + "step": 9345 + }, + { + "epoch": 0.9137661321861557, + "grad_norm": 8.084012197438224, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 906153608, + "step": 9346 + }, + { + "epoch": 0.9137661321861557, + "loss": 0.05322990566492081, + "loss_ce": 0.006019215099513531, + "loss_iou": 0.26953125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 906153608, + "step": 9346 + }, + { + "epoch": 0.9138639030113415, + "grad_norm": 7.06134217102828, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 906250384, + "step": 9347 + }, + { + "epoch": 0.9138639030113415, + "loss": 0.08283723890781403, + "loss_ce": 0.0056582847610116005, + "loss_iou": 0.318359375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 906250384, + "step": 9347 + }, + { + "epoch": 0.9139616738365272, + "grad_norm": 5.918056879869729, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 906347552, + "step": 9348 + }, + { + "epoch": 0.9139616738365272, + "loss": 0.08692954480648041, + "loss_ce": 0.004547344520688057, + "loss_iou": 0.248046875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 906347552, + "step": 9348 + }, + { + "epoch": 0.914059444661713, + "grad_norm": 15.623461197109288, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 906444160, + "step": 9349 + }, + { + "epoch": 0.914059444661713, + "loss": 0.03086722642183304, + "loss_ce": 0.0010057741310447454, + "loss_iou": 0.26171875, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 906444160, + "step": 9349 + }, + { + "epoch": 0.9141572154868987, + "grad_norm": 6.34327450512083, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 906541088, + "step": 9350 + }, + { + "epoch": 0.9141572154868987, + "loss": 0.09824399650096893, + "loss_ce": 0.007927224040031433, + "loss_iou": 0.369140625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 906541088, + "step": 9350 + }, + { + "epoch": 0.9142549863120845, + "grad_norm": 9.745319331343502, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 906638040, + "step": 9351 + }, + { + "epoch": 0.9142549863120845, + "loss": 0.04069680720567703, + "loss_ce": 0.004045194946229458, + "loss_iou": 0.3125, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 906638040, + "step": 9351 + }, + { + "epoch": 0.9143527571372703, + "grad_norm": 15.751026570149193, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 906734720, + "step": 9352 + }, + { + "epoch": 0.9143527571372703, + "loss": 0.10859414935112, + "loss_ce": 0.0044376542791724205, + "loss_iou": 0.2890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 906734720, + "step": 9352 + }, + { + "epoch": 0.914450527962456, + "grad_norm": 6.9419394014669065, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 906831768, + "step": 9353 + }, + { + "epoch": 0.914450527962456, + "loss": 0.060943495482206345, + "loss_ce": 0.004432569723576307, + "loss_iou": 0.349609375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 906831768, + "step": 9353 + }, + { + "epoch": 0.9145482987876418, + "grad_norm": 4.033261781167955, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 906928660, + "step": 9354 + }, + { + "epoch": 0.9145482987876418, + "loss": 0.09439273923635483, + "loss_ce": 0.007692296057939529, + "loss_iou": 0.3828125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 906928660, + "step": 9354 + }, + { + "epoch": 0.9146460696128276, + "grad_norm": 11.188465669232691, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 907025496, + "step": 9355 + }, + { + "epoch": 0.9146460696128276, + "loss": 0.09502436220645905, + "loss_ce": 0.007255802862346172, + "loss_iou": 0.31640625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 907025496, + "step": 9355 + }, + { + "epoch": 0.9147438404380133, + "grad_norm": 7.00945751452803, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 907121880, + "step": 9356 + }, + { + "epoch": 0.9147438404380133, + "loss": 0.12314566224813461, + "loss_ce": 0.005408850964158773, + "loss_iou": 0.2333984375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 907121880, + "step": 9356 + }, + { + "epoch": 0.9148416112631991, + "grad_norm": 2.634465106501761, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 907218928, + "step": 9357 + }, + { + "epoch": 0.9148416112631991, + "loss": 0.041518669575452805, + "loss_ce": 0.00826595351099968, + "loss_iou": 0.3046875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 907218928, + "step": 9357 + }, + { + "epoch": 0.9149393820883849, + "grad_norm": 2.259745421404669, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 907315828, + "step": 9358 + }, + { + "epoch": 0.9149393820883849, + "loss": 0.09370074421167374, + "loss_ce": 0.004116395488381386, + "loss_iou": 0.2177734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 907315828, + "step": 9358 + }, + { + "epoch": 0.9150371529135706, + "grad_norm": 10.296611298150447, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 907412336, + "step": 9359 + }, + { + "epoch": 0.9150371529135706, + "loss": 0.06723743677139282, + "loss_ce": 0.006980478297919035, + "loss_iou": 0.29296875, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 907412336, + "step": 9359 + }, + { + "epoch": 0.9151349237387564, + "grad_norm": 15.40996402554414, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 907509332, + "step": 9360 + }, + { + "epoch": 0.9151349237387564, + "loss": 0.060292523354291916, + "loss_ce": 0.007382671348750591, + "loss_iou": 0.271484375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 907509332, + "step": 9360 + }, + { + "epoch": 0.9152326945639421, + "grad_norm": 2.5867654049315973, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 907605188, + "step": 9361 + }, + { + "epoch": 0.9152326945639421, + "loss": 0.09202069044113159, + "loss_ce": 0.009714778512716293, + "loss_iou": 0.28515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 907605188, + "step": 9361 + }, + { + "epoch": 0.9153304653891279, + "grad_norm": 6.007695513708377, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 907701864, + "step": 9362 + }, + { + "epoch": 0.9153304653891279, + "loss": 0.05559881404042244, + "loss_ce": 0.002696592127904296, + "loss_iou": 0.2451171875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 907701864, + "step": 9362 + }, + { + "epoch": 0.9154282362143137, + "grad_norm": 9.021825896876933, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 907800092, + "step": 9363 + }, + { + "epoch": 0.9154282362143137, + "loss": 0.08803244680166245, + "loss_ce": 0.0062300777062773705, + "loss_iou": 0.3671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 907800092, + "step": 9363 + }, + { + "epoch": 0.9155260070394994, + "grad_norm": 10.636866019294507, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 907897020, + "step": 9364 + }, + { + "epoch": 0.9155260070394994, + "loss": 0.09311521053314209, + "loss_ce": 0.00662839412689209, + "loss_iou": 0.283203125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 907897020, + "step": 9364 + }, + { + "epoch": 0.9156237778646852, + "grad_norm": 18.32074435682919, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 907994052, + "step": 9365 + }, + { + "epoch": 0.9156237778646852, + "loss": 0.10806552320718765, + "loss_ce": 0.005984224379062653, + "loss_iou": 0.333984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 907994052, + "step": 9365 + }, + { + "epoch": 0.915721548689871, + "grad_norm": 11.124742126963701, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 908090592, + "step": 9366 + }, + { + "epoch": 0.915721548689871, + "loss": 0.04471852257847786, + "loss_ce": 0.0040233321487903595, + "loss_iou": 0.333984375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 908090592, + "step": 9366 + }, + { + "epoch": 0.9158193195150567, + "grad_norm": 5.900925424450876, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 908187220, + "step": 9367 + }, + { + "epoch": 0.9158193195150567, + "loss": 0.10095130652189255, + "loss_ce": 0.007231822703033686, + "loss_iou": 0.267578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 908187220, + "step": 9367 + }, + { + "epoch": 0.9159170903402425, + "grad_norm": 12.04877198758625, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 908284352, + "step": 9368 + }, + { + "epoch": 0.9159170903402425, + "loss": 0.05032537877559662, + "loss_ce": 0.005174625664949417, + "loss_iou": 0.29296875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 908284352, + "step": 9368 + }, + { + "epoch": 0.9160148611654282, + "grad_norm": 7.837218898448114, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 908382656, + "step": 9369 + }, + { + "epoch": 0.9160148611654282, + "loss": 0.05954534560441971, + "loss_ce": 0.005483455490320921, + "loss_iou": 0.341796875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 908382656, + "step": 9369 + }, + { + "epoch": 0.916112631990614, + "grad_norm": 14.011749881261261, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 908478960, + "step": 9370 + }, + { + "epoch": 0.916112631990614, + "loss": 0.06109779328107834, + "loss_ce": 0.004228288307785988, + "loss_iou": 0.20703125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 908478960, + "step": 9370 + }, + { + "epoch": 0.9162104028157998, + "grad_norm": 5.095788724124793, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 908576164, + "step": 9371 + }, + { + "epoch": 0.9162104028157998, + "loss": 0.059798117727041245, + "loss_ce": 0.003966208081692457, + "loss_iou": 0.359375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 908576164, + "step": 9371 + }, + { + "epoch": 0.9163081736409855, + "grad_norm": 20.486136619411695, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 908673216, + "step": 9372 + }, + { + "epoch": 0.9163081736409855, + "loss": 0.06972363591194153, + "loss_ce": 0.002981693483889103, + "loss_iou": 0.3046875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 908673216, + "step": 9372 + }, + { + "epoch": 0.9164059444661713, + "grad_norm": 10.833339740113667, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 908770908, + "step": 9373 + }, + { + "epoch": 0.9164059444661713, + "loss": 0.0527988076210022, + "loss_ce": 0.003276408649981022, + "loss_iou": 0.35546875, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 908770908, + "step": 9373 + }, + { + "epoch": 0.9165037152913571, + "grad_norm": 2.8352274365918784, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 908867224, + "step": 9374 + }, + { + "epoch": 0.9165037152913571, + "loss": 0.09986744821071625, + "loss_ce": 0.0057054562494158745, + "loss_iou": 0.28125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 908867224, + "step": 9374 + }, + { + "epoch": 0.9166014861165428, + "grad_norm": 5.920323784053209, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 908964884, + "step": 9375 + }, + { + "epoch": 0.9166014861165428, + "loss": 0.038971103727817535, + "loss_ce": 0.00416580680757761, + "loss_iou": 0.328125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 908964884, + "step": 9375 + }, + { + "epoch": 0.9166992569417286, + "grad_norm": 4.5359825226438195, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 909061168, + "step": 9376 + }, + { + "epoch": 0.9166992569417286, + "loss": 0.08655674010515213, + "loss_ce": 0.002581903710961342, + "loss_iou": 0.259765625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 909061168, + "step": 9376 + }, + { + "epoch": 0.9167970277669144, + "grad_norm": 8.877609405998527, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 909158056, + "step": 9377 + }, + { + "epoch": 0.9167970277669144, + "loss": 0.08838765323162079, + "loss_ce": 0.007424520328640938, + "loss_iou": 0.279296875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 909158056, + "step": 9377 + }, + { + "epoch": 0.9168947985921001, + "grad_norm": 7.987455711317746, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 909255212, + "step": 9378 + }, + { + "epoch": 0.9168947985921001, + "loss": 0.08587951958179474, + "loss_ce": 0.004962158389389515, + "loss_iou": 0.287109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 909255212, + "step": 9378 + }, + { + "epoch": 0.9169925694172859, + "grad_norm": 7.427435971011941, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 909352076, + "step": 9379 + }, + { + "epoch": 0.9169925694172859, + "loss": 0.08152840286493301, + "loss_ce": 0.003502584295347333, + "loss_iou": 0.333984375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 909352076, + "step": 9379 + }, + { + "epoch": 0.9170903402424716, + "grad_norm": 7.568782216363236, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 909448580, + "step": 9380 + }, + { + "epoch": 0.9170903402424716, + "loss": 0.1663757562637329, + "loss_ce": 0.0044189682230353355, + "loss_iou": 0.2109375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 909448580, + "step": 9380 + }, + { + "epoch": 0.9171881110676574, + "grad_norm": 6.136278426556634, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 909545644, + "step": 9381 + }, + { + "epoch": 0.9171881110676574, + "loss": 0.07880514860153198, + "loss_ce": 0.0024501641746610403, + "loss_iou": 0.333984375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 909545644, + "step": 9381 + }, + { + "epoch": 0.9172858818928432, + "grad_norm": 4.100610607744663, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 909640312, + "step": 9382 + }, + { + "epoch": 0.9172858818928432, + "loss": 0.03718097507953644, + "loss_ce": 0.0067778402008116245, + "loss_iou": 0.21484375, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 909640312, + "step": 9382 + }, + { + "epoch": 0.9173836527180289, + "grad_norm": 6.4146127721839274, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 909737508, + "step": 9383 + }, + { + "epoch": 0.9173836527180289, + "loss": 0.05207998305559158, + "loss_ce": 0.004091092385351658, + "loss_iou": 0.30859375, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 909737508, + "step": 9383 + }, + { + "epoch": 0.9174814235432147, + "grad_norm": 16.00936900384052, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 909834588, + "step": 9384 + }, + { + "epoch": 0.9174814235432147, + "loss": 0.08776520192623138, + "loss_ce": 0.006794440560042858, + "loss_iou": 0.302734375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 909834588, + "step": 9384 + }, + { + "epoch": 0.9175791943684005, + "grad_norm": 15.312201587083432, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 909931788, + "step": 9385 + }, + { + "epoch": 0.9175791943684005, + "loss": 0.04626084864139557, + "loss_ce": 0.0032615812961012125, + "loss_iou": 0.28515625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 909931788, + "step": 9385 + }, + { + "epoch": 0.9176769651935862, + "grad_norm": 6.211069951515345, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 910028924, + "step": 9386 + }, + { + "epoch": 0.9176769651935862, + "loss": 0.05575544759631157, + "loss_ce": 0.005889723543077707, + "loss_iou": 0.3125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 910028924, + "step": 9386 + }, + { + "epoch": 0.917774736018772, + "grad_norm": 12.405075594394527, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 910124616, + "step": 9387 + }, + { + "epoch": 0.917774736018772, + "loss": 0.06453397125005722, + "loss_ce": 0.0034177510533481836, + "loss_iou": 0.1923828125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 910124616, + "step": 9387 + }, + { + "epoch": 0.9178725068439577, + "grad_norm": 3.4097970239395745, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 910220720, + "step": 9388 + }, + { + "epoch": 0.9178725068439577, + "loss": 0.04768998175859451, + "loss_ce": 0.004444669000804424, + "loss_iou": 0.17578125, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 910220720, + "step": 9388 + }, + { + "epoch": 0.9179702776691435, + "grad_norm": 3.395709904462022, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 910318228, + "step": 9389 + }, + { + "epoch": 0.9179702776691435, + "loss": 0.0777505412697792, + "loss_ce": 0.0043099913746118546, + "loss_iou": 0.29296875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 910318228, + "step": 9389 + }, + { + "epoch": 0.9180680484943293, + "grad_norm": 12.337519865582939, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 910415696, + "step": 9390 + }, + { + "epoch": 0.9180680484943293, + "loss": 0.06423011422157288, + "loss_ce": 0.005346442572772503, + "loss_iou": 0.27734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 910415696, + "step": 9390 + }, + { + "epoch": 0.918165819319515, + "grad_norm": 9.737484384694666, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 910512024, + "step": 9391 + }, + { + "epoch": 0.918165819319515, + "loss": 0.05098733678460121, + "loss_ce": 0.00550088519230485, + "loss_iou": 0.375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 910512024, + "step": 9391 + }, + { + "epoch": 0.9182635901447008, + "grad_norm": 14.885940683505016, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 910608520, + "step": 9392 + }, + { + "epoch": 0.9182635901447008, + "loss": 0.09121472388505936, + "loss_ce": 0.0054298085160553455, + "loss_iou": 0.30078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 910608520, + "step": 9392 + }, + { + "epoch": 0.9183613609698866, + "grad_norm": 7.895774179585189, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 910705512, + "step": 9393 + }, + { + "epoch": 0.9183613609698866, + "loss": 0.06610480695962906, + "loss_ce": 0.00481024943292141, + "loss_iou": 0.267578125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 910705512, + "step": 9393 + }, + { + "epoch": 0.9184591317950723, + "grad_norm": 9.935621514493574, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 910802464, + "step": 9394 + }, + { + "epoch": 0.9184591317950723, + "loss": 0.07039576768875122, + "loss_ce": 0.003478347323834896, + "loss_iou": 0.19921875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 910802464, + "step": 9394 + }, + { + "epoch": 0.9185569026202581, + "grad_norm": 10.43966565716952, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 910899312, + "step": 9395 + }, + { + "epoch": 0.9185569026202581, + "loss": 0.07107327878475189, + "loss_ce": 0.0018288912251591682, + "loss_iou": 0.330078125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 910899312, + "step": 9395 + }, + { + "epoch": 0.9186546734454438, + "grad_norm": 4.4500324177594255, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 910997076, + "step": 9396 + }, + { + "epoch": 0.9186546734454438, + "loss": 0.07871280610561371, + "loss_ce": 0.0006030646618455648, + "loss_iou": 0.337890625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 910997076, + "step": 9396 + }, + { + "epoch": 0.9187524442706296, + "grad_norm": 3.7278430562508262, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 911093364, + "step": 9397 + }, + { + "epoch": 0.9187524442706296, + "loss": 0.06279964745044708, + "loss_ce": 0.007166101597249508, + "loss_iou": 0.1962890625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 911093364, + "step": 9397 + }, + { + "epoch": 0.9188502150958154, + "grad_norm": 6.512553505908586, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 911190232, + "step": 9398 + }, + { + "epoch": 0.9188502150958154, + "loss": 0.07218489050865173, + "loss_ce": 0.003550858935341239, + "loss_iou": 0.294921875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 911190232, + "step": 9398 + }, + { + "epoch": 0.9189479859210011, + "grad_norm": 4.109612391744781, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 911288492, + "step": 9399 + }, + { + "epoch": 0.9189479859210011, + "loss": 0.06664861738681793, + "loss_ce": 0.005430352874100208, + "loss_iou": 0.306640625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 911288492, + "step": 9399 + }, + { + "epoch": 0.9190457567461869, + "grad_norm": 41.30829679629443, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 911384928, + "step": 9400 + }, + { + "epoch": 0.9190457567461869, + "loss": 0.08458516001701355, + "loss_ce": 0.004728290252387524, + "loss_iou": 0.2578125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 911384928, + "step": 9400 + }, + { + "epoch": 0.9191435275713727, + "grad_norm": 45.7074449844194, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 911482308, + "step": 9401 + }, + { + "epoch": 0.9191435275713727, + "loss": 0.090795136988163, + "loss_ce": 0.004033662378787994, + "loss_iou": 0.283203125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 911482308, + "step": 9401 + }, + { + "epoch": 0.9192412983965584, + "grad_norm": 7.036962737072379, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 911579712, + "step": 9402 + }, + { + "epoch": 0.9192412983965584, + "loss": 0.11001914739608765, + "loss_ce": 0.007003248669207096, + "loss_iou": 0.3359375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 911579712, + "step": 9402 + }, + { + "epoch": 0.9193390692217442, + "grad_norm": 17.84707871273227, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 911677076, + "step": 9403 + }, + { + "epoch": 0.9193390692217442, + "loss": 0.0726776197552681, + "loss_ce": 0.004371653310954571, + "loss_iou": 0.322265625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 911677076, + "step": 9403 + }, + { + "epoch": 0.91943684004693, + "grad_norm": 9.618872052744125, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 911773708, + "step": 9404 + }, + { + "epoch": 0.91943684004693, + "loss": 0.06302434206008911, + "loss_ce": 0.0034845443442463875, + "loss_iou": 0.28515625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 911773708, + "step": 9404 + }, + { + "epoch": 0.9195346108721157, + "grad_norm": 4.02291625908262, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 911871308, + "step": 9405 + }, + { + "epoch": 0.9195346108721157, + "loss": 0.0551455095410347, + "loss_ce": 0.004978427197784185, + "loss_iou": 0.28125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 911871308, + "step": 9405 + }, + { + "epoch": 0.9196323816973015, + "grad_norm": 9.403744802516767, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 911968544, + "step": 9406 + }, + { + "epoch": 0.9196323816973015, + "loss": 0.05263500660657883, + "loss_ce": 0.004306608345359564, + "loss_iou": 0.203125, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 911968544, + "step": 9406 + }, + { + "epoch": 0.9197301525224872, + "grad_norm": 6.834580631132868, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 912065204, + "step": 9407 + }, + { + "epoch": 0.9197301525224872, + "loss": 0.05731099098920822, + "loss_ce": 0.0025777146220207214, + "loss_iou": 0.275390625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 912065204, + "step": 9407 + }, + { + "epoch": 0.919827923347673, + "grad_norm": 6.037295195108805, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 912163356, + "step": 9408 + }, + { + "epoch": 0.919827923347673, + "loss": 0.08627988398075104, + "loss_ce": 0.010443706065416336, + "loss_iou": 0.314453125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 912163356, + "step": 9408 + }, + { + "epoch": 0.9199256941728589, + "grad_norm": 5.92070705686092, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 912260336, + "step": 9409 + }, + { + "epoch": 0.9199256941728589, + "loss": 0.0802459567785263, + "loss_ce": 0.004806506913155317, + "loss_iou": 0.23828125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 912260336, + "step": 9409 + }, + { + "epoch": 0.9200234649980445, + "grad_norm": 2.643234872773141, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 912357600, + "step": 9410 + }, + { + "epoch": 0.9200234649980445, + "loss": 0.08306235820055008, + "loss_ce": 0.004197307862341404, + "loss_iou": 0.255859375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 912357600, + "step": 9410 + }, + { + "epoch": 0.9201212358232304, + "grad_norm": 7.724370293279072, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 912454744, + "step": 9411 + }, + { + "epoch": 0.9201212358232304, + "loss": 0.06041748449206352, + "loss_ce": 0.0022052081767469645, + "loss_iou": 0.2578125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 912454744, + "step": 9411 + }, + { + "epoch": 0.9202190066484162, + "grad_norm": 5.408968444039239, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 912552244, + "step": 9412 + }, + { + "epoch": 0.9202190066484162, + "loss": 0.040140118449926376, + "loss_ce": 0.0032176615204662085, + "loss_iou": 0.2236328125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 912552244, + "step": 9412 + }, + { + "epoch": 0.9203167774736019, + "grad_norm": 12.505185800498593, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 912649540, + "step": 9413 + }, + { + "epoch": 0.9203167774736019, + "loss": 0.06348647177219391, + "loss_ce": 0.003977195359766483, + "loss_iou": 0.298828125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 912649540, + "step": 9413 + }, + { + "epoch": 0.9204145482987877, + "grad_norm": 3.5252546333662025, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 912747360, + "step": 9414 + }, + { + "epoch": 0.9204145482987877, + "loss": 0.06962118297815323, + "loss_ce": 0.0038481769151985645, + "loss_iou": 0.267578125, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 912747360, + "step": 9414 + }, + { + "epoch": 0.9205123191239734, + "grad_norm": 9.98847288739053, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 912844000, + "step": 9415 + }, + { + "epoch": 0.9205123191239734, + "loss": 0.08369514346122742, + "loss_ce": 0.009079662151634693, + "loss_iou": 0.287109375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 912844000, + "step": 9415 + }, + { + "epoch": 0.9206100899491592, + "grad_norm": 4.835593944925329, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 912941256, + "step": 9416 + }, + { + "epoch": 0.9206100899491592, + "loss": 0.05225016921758652, + "loss_ce": 0.0026057029608637094, + "loss_iou": 0.435546875, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 912941256, + "step": 9416 + }, + { + "epoch": 0.920707860774345, + "grad_norm": 5.579640578682614, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 913037824, + "step": 9417 + }, + { + "epoch": 0.920707860774345, + "loss": 0.08471730351448059, + "loss_ce": 0.008591212332248688, + "loss_iou": 0.3125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 913037824, + "step": 9417 + }, + { + "epoch": 0.9208056315995307, + "grad_norm": 15.640138660171047, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 913134408, + "step": 9418 + }, + { + "epoch": 0.9208056315995307, + "loss": 0.07752680778503418, + "loss_ce": 0.00549006462097168, + "loss_iou": 0.2421875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 913134408, + "step": 9418 + }, + { + "epoch": 0.9209034024247165, + "grad_norm": 3.2373212180340674, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 913232208, + "step": 9419 + }, + { + "epoch": 0.9209034024247165, + "loss": 0.06890149414539337, + "loss_ce": 0.006615116260945797, + "loss_iou": 0.36328125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 913232208, + "step": 9419 + }, + { + "epoch": 0.9210011732499023, + "grad_norm": 28.712313458301615, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 913328804, + "step": 9420 + }, + { + "epoch": 0.9210011732499023, + "loss": 0.038538672029972076, + "loss_ce": 0.009409643709659576, + "loss_iou": 0.240234375, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 913328804, + "step": 9420 + }, + { + "epoch": 0.921098944075088, + "grad_norm": 41.82238458977634, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 913425940, + "step": 9421 + }, + { + "epoch": 0.921098944075088, + "loss": 0.06549777090549469, + "loss_ce": 0.00501193106174469, + "loss_iou": 0.365234375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 913425940, + "step": 9421 + }, + { + "epoch": 0.9211967149002738, + "grad_norm": 12.766228370307996, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 913522720, + "step": 9422 + }, + { + "epoch": 0.9211967149002738, + "loss": 0.09517937898635864, + "loss_ce": 0.00924188643693924, + "loss_iou": 0.2412109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 913522720, + "step": 9422 + }, + { + "epoch": 0.9212944857254596, + "grad_norm": 8.841368215059733, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 913619052, + "step": 9423 + }, + { + "epoch": 0.9212944857254596, + "loss": 0.11232632398605347, + "loss_ce": 0.005034157074987888, + "loss_iou": 0.251953125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 913619052, + "step": 9423 + }, + { + "epoch": 0.9213922565506453, + "grad_norm": 33.74045078596793, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 913716420, + "step": 9424 + }, + { + "epoch": 0.9213922565506453, + "loss": 0.05271291732788086, + "loss_ce": 0.00953817367553711, + "loss_iou": 0.25390625, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 913716420, + "step": 9424 + }, + { + "epoch": 0.9214900273758311, + "grad_norm": 17.33820977394779, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 913814204, + "step": 9425 + }, + { + "epoch": 0.9214900273758311, + "loss": 0.08622832596302032, + "loss_ce": 0.009354542009532452, + "loss_iou": 0.302734375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 913814204, + "step": 9425 + }, + { + "epoch": 0.9215877982010168, + "grad_norm": 11.69102274891693, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 913911240, + "step": 9426 + }, + { + "epoch": 0.9215877982010168, + "loss": 0.08962243795394897, + "loss_ce": 0.005200326442718506, + "loss_iou": 0.263671875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 913911240, + "step": 9426 + }, + { + "epoch": 0.9216855690262026, + "grad_norm": 5.657020461572075, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 914006816, + "step": 9427 + }, + { + "epoch": 0.9216855690262026, + "loss": 0.0535043403506279, + "loss_ce": 0.004939429461956024, + "loss_iou": 0.203125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 914006816, + "step": 9427 + }, + { + "epoch": 0.9217833398513884, + "grad_norm": 87.35175144650653, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 914104128, + "step": 9428 + }, + { + "epoch": 0.9217833398513884, + "loss": 0.09072868525981903, + "loss_ce": 0.005020059645175934, + "loss_iou": 0.380859375, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 914104128, + "step": 9428 + }, + { + "epoch": 0.9218811106765741, + "grad_norm": 11.657048133699515, + "learning_rate": 5e-05, + "loss": 0.1088, + "num_input_tokens_seen": 914201076, + "step": 9429 + }, + { + "epoch": 0.9218811106765741, + "loss": 0.14802393317222595, + "loss_ce": 0.01137385331094265, + "loss_iou": 0.2109375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 914201076, + "step": 9429 + }, + { + "epoch": 0.9219788815017599, + "grad_norm": 127.33826480306158, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 914298224, + "step": 9430 + }, + { + "epoch": 0.9219788815017599, + "loss": 0.07395732402801514, + "loss_ce": 0.006925465539097786, + "loss_iou": 0.2294921875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 914298224, + "step": 9430 + }, + { + "epoch": 0.9220766523269457, + "grad_norm": 22.50553694325428, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 914395544, + "step": 9431 + }, + { + "epoch": 0.9220766523269457, + "loss": 0.09864717721939087, + "loss_ce": 0.005377823021262884, + "loss_iou": 0.25390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 914395544, + "step": 9431 + }, + { + "epoch": 0.9221744231521314, + "grad_norm": 9.549908760188048, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 914491576, + "step": 9432 + }, + { + "epoch": 0.9221744231521314, + "loss": 0.08075308799743652, + "loss_ce": 0.0021016616374254227, + "loss_iou": 0.265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 914491576, + "step": 9432 + }, + { + "epoch": 0.9222721939773172, + "grad_norm": 7.480194087903953, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 914587764, + "step": 9433 + }, + { + "epoch": 0.9222721939773172, + "loss": 0.11148960143327713, + "loss_ce": 0.008439372293651104, + "loss_iou": 0.3046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 914587764, + "step": 9433 + }, + { + "epoch": 0.9223699648025029, + "grad_norm": 15.85030963326036, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 914684552, + "step": 9434 + }, + { + "epoch": 0.9223699648025029, + "loss": 0.029907820746302605, + "loss_ce": 0.0031400902662426233, + "loss_iou": 0.2470703125, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 914684552, + "step": 9434 + }, + { + "epoch": 0.9224677356276887, + "grad_norm": 7.277249233379738, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 914780772, + "step": 9435 + }, + { + "epoch": 0.9224677356276887, + "loss": 0.0964459553360939, + "loss_ce": 0.008257780224084854, + "loss_iou": 0.173828125, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 914780772, + "step": 9435 + }, + { + "epoch": 0.9225655064528745, + "grad_norm": 3.18668134538502, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 914878228, + "step": 9436 + }, + { + "epoch": 0.9225655064528745, + "loss": 0.05971498787403107, + "loss_ce": 0.006843286100775003, + "loss_iou": 0.263671875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 914878228, + "step": 9436 + }, + { + "epoch": 0.9226632772780602, + "grad_norm": 3.404928958123413, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 914976112, + "step": 9437 + }, + { + "epoch": 0.9226632772780602, + "loss": 0.08807697892189026, + "loss_ce": 0.005099678412079811, + "loss_iou": 0.232421875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 914976112, + "step": 9437 + }, + { + "epoch": 0.922761048103246, + "grad_norm": 6.048883028308181, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 915074276, + "step": 9438 + }, + { + "epoch": 0.922761048103246, + "loss": 0.12425579875707626, + "loss_ce": 0.007526060566306114, + "loss_iou": 0.33984375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 915074276, + "step": 9438 + }, + { + "epoch": 0.9228588189284318, + "grad_norm": 3.8604968009751466, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 915171348, + "step": 9439 + }, + { + "epoch": 0.9228588189284318, + "loss": 0.0738469809293747, + "loss_ce": 0.002168822567909956, + "loss_iou": 0.2373046875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 915171348, + "step": 9439 + }, + { + "epoch": 0.9229565897536175, + "grad_norm": 8.431548242849331, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 915267572, + "step": 9440 + }, + { + "epoch": 0.9229565897536175, + "loss": 0.06100130081176758, + "loss_ce": 0.009274007752537727, + "loss_iou": 0.326171875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 915267572, + "step": 9440 + }, + { + "epoch": 0.9230543605788033, + "grad_norm": 5.712660772644951, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 915365640, + "step": 9441 + }, + { + "epoch": 0.9230543605788033, + "loss": 0.08277326822280884, + "loss_ce": 0.004190498031675816, + "loss_iou": 0.318359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 915365640, + "step": 9441 + }, + { + "epoch": 0.923152131403989, + "grad_norm": 16.28949294249946, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 915462776, + "step": 9442 + }, + { + "epoch": 0.923152131403989, + "loss": 0.0603853315114975, + "loss_ce": 0.0022188248112797737, + "loss_iou": 0.28125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 915462776, + "step": 9442 + }, + { + "epoch": 0.9232499022291748, + "grad_norm": 3.9064949398220423, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 915560332, + "step": 9443 + }, + { + "epoch": 0.9232499022291748, + "loss": 0.07256615161895752, + "loss_ce": 0.004344110377132893, + "loss_iou": 0.302734375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 915560332, + "step": 9443 + }, + { + "epoch": 0.9233476730543606, + "grad_norm": 2.431105081438114, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 915658212, + "step": 9444 + }, + { + "epoch": 0.9233476730543606, + "loss": 0.05004587024450302, + "loss_ce": 0.0035523404367268085, + "loss_iou": 0.2451171875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 915658212, + "step": 9444 + }, + { + "epoch": 0.9234454438795463, + "grad_norm": 26.214330697246496, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 915755600, + "step": 9445 + }, + { + "epoch": 0.9234454438795463, + "loss": 0.06085069477558136, + "loss_ce": 0.003431871999055147, + "loss_iou": 0.2119140625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 915755600, + "step": 9445 + }, + { + "epoch": 0.9235432147047321, + "grad_norm": 4.049831371796082, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 915852480, + "step": 9446 + }, + { + "epoch": 0.9235432147047321, + "loss": 0.04775366187095642, + "loss_ce": 0.003373476443812251, + "loss_iou": 0.25390625, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 915852480, + "step": 9446 + }, + { + "epoch": 0.9236409855299179, + "grad_norm": 3.7460939304474556, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 915948880, + "step": 9447 + }, + { + "epoch": 0.9236409855299179, + "loss": 0.0481458343565464, + "loss_ce": 0.007786340080201626, + "loss_iou": 0.32421875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 915948880, + "step": 9447 + }, + { + "epoch": 0.9237387563551036, + "grad_norm": 10.170002298000698, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 916045004, + "step": 9448 + }, + { + "epoch": 0.9237387563551036, + "loss": 0.07904615253210068, + "loss_ce": 0.0034846251364797354, + "loss_iou": 0.32421875, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 916045004, + "step": 9448 + }, + { + "epoch": 0.9238365271802894, + "grad_norm": 2.7674211214738396, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 916141276, + "step": 9449 + }, + { + "epoch": 0.9238365271802894, + "loss": 0.07663232088088989, + "loss_ce": 0.009150328114628792, + "loss_iou": 0.181640625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 916141276, + "step": 9449 + }, + { + "epoch": 0.9239342980054752, + "grad_norm": 13.830015252474482, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 916237872, + "step": 9450 + }, + { + "epoch": 0.9239342980054752, + "loss": 0.04819750785827637, + "loss_ce": 0.006983520463109016, + "loss_iou": 0.220703125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 916237872, + "step": 9450 + }, + { + "epoch": 0.9240320688306609, + "grad_norm": 2.1735958151997834, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 916335092, + "step": 9451 + }, + { + "epoch": 0.9240320688306609, + "loss": 0.07811028510332108, + "loss_ce": 0.005226678214967251, + "loss_iou": 0.30859375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 916335092, + "step": 9451 + }, + { + "epoch": 0.9241298396558467, + "grad_norm": 3.6181839087603946, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 916432108, + "step": 9452 + }, + { + "epoch": 0.9241298396558467, + "loss": 0.07118847221136093, + "loss_ce": 0.007055782712996006, + "loss_iou": 0.30078125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 916432108, + "step": 9452 + }, + { + "epoch": 0.9242276104810324, + "grad_norm": 7.075336950616634, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 916528844, + "step": 9453 + }, + { + "epoch": 0.9242276104810324, + "loss": 0.07184161245822906, + "loss_ce": 0.002032645046710968, + "loss_iou": 0.318359375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 916528844, + "step": 9453 + }, + { + "epoch": 0.9243253813062182, + "grad_norm": 17.569872292055134, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 916625372, + "step": 9454 + }, + { + "epoch": 0.9243253813062182, + "loss": 0.05430970713496208, + "loss_ce": 0.004749160725623369, + "loss_iou": 0.279296875, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 916625372, + "step": 9454 + }, + { + "epoch": 0.924423152131404, + "grad_norm": 9.466188393298893, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 916721716, + "step": 9455 + }, + { + "epoch": 0.924423152131404, + "loss": 0.07105210423469543, + "loss_ce": 0.0039134337566792965, + "loss_iou": 0.2734375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 916721716, + "step": 9455 + }, + { + "epoch": 0.9245209229565897, + "grad_norm": 15.088259170360793, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 916817988, + "step": 9456 + }, + { + "epoch": 0.9245209229565897, + "loss": 0.08833843469619751, + "loss_ce": 0.00522381579503417, + "loss_iou": 0.25, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 916817988, + "step": 9456 + }, + { + "epoch": 0.9246186937817755, + "grad_norm": 26.37695418499205, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 916915132, + "step": 9457 + }, + { + "epoch": 0.9246186937817755, + "loss": 0.07081635296344757, + "loss_ce": 0.007797555532306433, + "loss_iou": 0.41796875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 916915132, + "step": 9457 + }, + { + "epoch": 0.9247164646069613, + "grad_norm": 12.175475816013769, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 917011568, + "step": 9458 + }, + { + "epoch": 0.9247164646069613, + "loss": 0.1317676454782486, + "loss_ce": 0.005340952426195145, + "loss_iou": 0.32421875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 917011568, + "step": 9458 + }, + { + "epoch": 0.924814235432147, + "grad_norm": 4.313475940804357, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 917107760, + "step": 9459 + }, + { + "epoch": 0.924814235432147, + "loss": 0.05736852437257767, + "loss_ce": 0.016856439411640167, + "loss_iou": 0.2734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 917107760, + "step": 9459 + }, + { + "epoch": 0.9249120062573328, + "grad_norm": 2.7960768142257617, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 917204860, + "step": 9460 + }, + { + "epoch": 0.9249120062573328, + "loss": 0.08615818619728088, + "loss_ce": 0.006324203684926033, + "loss_iou": 0.2216796875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 917204860, + "step": 9460 + }, + { + "epoch": 0.9250097770825185, + "grad_norm": 16.00192824627718, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 917301156, + "step": 9461 + }, + { + "epoch": 0.9250097770825185, + "loss": 0.061725083738565445, + "loss_ce": 0.003642500378191471, + "loss_iou": 0.2333984375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 917301156, + "step": 9461 + }, + { + "epoch": 0.9251075479077043, + "grad_norm": 12.680116910740148, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 917398388, + "step": 9462 + }, + { + "epoch": 0.9251075479077043, + "loss": 0.09146155416965485, + "loss_ce": 0.0038837315514683723, + "loss_iou": 0.36328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 917398388, + "step": 9462 + }, + { + "epoch": 0.9252053187328901, + "grad_norm": 25.539225264817617, + "learning_rate": 5e-05, + "loss": 0.1307, + "num_input_tokens_seen": 917496072, + "step": 9463 + }, + { + "epoch": 0.9252053187328901, + "loss": 0.13425518572330475, + "loss_ce": 0.004296085797250271, + "loss_iou": 0.349609375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 917496072, + "step": 9463 + }, + { + "epoch": 0.9253030895580758, + "grad_norm": 9.917424437882515, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 917592956, + "step": 9464 + }, + { + "epoch": 0.9253030895580758, + "loss": 0.08139476180076599, + "loss_ce": 0.006283374037593603, + "loss_iou": 0.205078125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 917592956, + "step": 9464 + }, + { + "epoch": 0.9254008603832616, + "grad_norm": 3.618217263588995, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 917690384, + "step": 9465 + }, + { + "epoch": 0.9254008603832616, + "loss": 0.08201789855957031, + "loss_ce": 0.006090159993618727, + "loss_iou": 0.28515625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 917690384, + "step": 9465 + }, + { + "epoch": 0.9254986312084474, + "grad_norm": 4.619061749170003, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 917788472, + "step": 9466 + }, + { + "epoch": 0.9254986312084474, + "loss": 0.06041726469993591, + "loss_ce": 0.005134673789143562, + "loss_iou": 0.365234375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 917788472, + "step": 9466 + }, + { + "epoch": 0.9255964020336331, + "grad_norm": 5.178117586997116, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 917885152, + "step": 9467 + }, + { + "epoch": 0.9255964020336331, + "loss": 0.11154555529356003, + "loss_ce": 0.003437041537836194, + "loss_iou": 0.287109375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 917885152, + "step": 9467 + }, + { + "epoch": 0.9256941728588189, + "grad_norm": 7.581631625066062, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 917981700, + "step": 9468 + }, + { + "epoch": 0.9256941728588189, + "loss": 0.05789832025766373, + "loss_ce": 0.002321997657418251, + "loss_iou": 0.255859375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 917981700, + "step": 9468 + }, + { + "epoch": 0.9257919436840047, + "grad_norm": 6.030435697740238, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 918078532, + "step": 9469 + }, + { + "epoch": 0.9257919436840047, + "loss": 0.047885529696941376, + "loss_ce": 0.004825226962566376, + "loss_iou": 0.28515625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 918078532, + "step": 9469 + }, + { + "epoch": 0.9258897145091904, + "grad_norm": 3.997752147470322, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 918176096, + "step": 9470 + }, + { + "epoch": 0.9258897145091904, + "loss": 0.07240670919418335, + "loss_ce": 0.0038184495642781258, + "loss_iou": 0.28125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 918176096, + "step": 9470 + }, + { + "epoch": 0.9259874853343762, + "grad_norm": 4.306124315374363, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 918273496, + "step": 9471 + }, + { + "epoch": 0.9259874853343762, + "loss": 0.04975813627243042, + "loss_ce": 0.0055229077115654945, + "loss_iou": 0.294921875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 918273496, + "step": 9471 + }, + { + "epoch": 0.9260852561595619, + "grad_norm": 7.403810499661762, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 918370060, + "step": 9472 + }, + { + "epoch": 0.9260852561595619, + "loss": 0.06645442545413971, + "loss_ce": 0.005510827526450157, + "loss_iou": 0.267578125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 918370060, + "step": 9472 + }, + { + "epoch": 0.9261830269847477, + "grad_norm": 9.719086355067954, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 918467088, + "step": 9473 + }, + { + "epoch": 0.9261830269847477, + "loss": 0.12777341902256012, + "loss_ce": 0.007122166454792023, + "loss_iou": 0.333984375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 918467088, + "step": 9473 + }, + { + "epoch": 0.9262807978099336, + "grad_norm": 3.0853378609986533, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 918564268, + "step": 9474 + }, + { + "epoch": 0.9262807978099336, + "loss": 0.1064341813325882, + "loss_ce": 0.0060160793364048, + "loss_iou": 0.2138671875, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 918564268, + "step": 9474 + }, + { + "epoch": 0.9263785686351192, + "grad_norm": 5.689903841496528, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 918661700, + "step": 9475 + }, + { + "epoch": 0.9263785686351192, + "loss": 0.04641842842102051, + "loss_ce": 0.004327060654759407, + "loss_iou": 0.27734375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 918661700, + "step": 9475 + }, + { + "epoch": 0.926476339460305, + "grad_norm": 2.900210623180661, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 918758860, + "step": 9476 + }, + { + "epoch": 0.926476339460305, + "loss": 0.050449155271053314, + "loss_ce": 0.005527284927666187, + "loss_iou": 0.287109375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 918758860, + "step": 9476 + }, + { + "epoch": 0.9265741102854909, + "grad_norm": 3.4371186563477054, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 918856392, + "step": 9477 + }, + { + "epoch": 0.9265741102854909, + "loss": 0.10921081900596619, + "loss_ce": 0.0029943850822746754, + "loss_iou": 0.31640625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 918856392, + "step": 9477 + }, + { + "epoch": 0.9266718811106766, + "grad_norm": 4.101221357181845, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 918952996, + "step": 9478 + }, + { + "epoch": 0.9266718811106766, + "loss": 0.12380541861057281, + "loss_ce": 0.003474604804068804, + "loss_iou": 0.251953125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 918952996, + "step": 9478 + }, + { + "epoch": 0.9267696519358624, + "grad_norm": 7.828188309725851, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 919049604, + "step": 9479 + }, + { + "epoch": 0.9267696519358624, + "loss": 0.040532343089580536, + "loss_ce": 0.008504144847393036, + "loss_iou": 0.2392578125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 919049604, + "step": 9479 + }, + { + "epoch": 0.926867422761048, + "grad_norm": 3.572116169339408, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 919145604, + "step": 9480 + }, + { + "epoch": 0.926867422761048, + "loss": 0.05793169513344765, + "loss_ce": 0.0022294882219284773, + "loss_iou": 0.26171875, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 919145604, + "step": 9480 + }, + { + "epoch": 0.9269651935862339, + "grad_norm": 7.080533680515356, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 919242120, + "step": 9481 + }, + { + "epoch": 0.9269651935862339, + "loss": 0.03418959304690361, + "loss_ce": 0.0015586747322231531, + "loss_iou": 0.216796875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 919242120, + "step": 9481 + }, + { + "epoch": 0.9270629644114197, + "grad_norm": 5.42185871894906, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 919338516, + "step": 9482 + }, + { + "epoch": 0.9270629644114197, + "loss": 0.07810133695602417, + "loss_ce": 0.007682028226554394, + "loss_iou": 0.27734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 919338516, + "step": 9482 + }, + { + "epoch": 0.9271607352366054, + "grad_norm": 4.023771123134558, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 919434980, + "step": 9483 + }, + { + "epoch": 0.9271607352366054, + "loss": 0.09525664150714874, + "loss_ce": 0.006450491491705179, + "loss_iou": 0.2177734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 919434980, + "step": 9483 + }, + { + "epoch": 0.9272585060617912, + "grad_norm": 29.81271458796825, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 919531888, + "step": 9484 + }, + { + "epoch": 0.9272585060617912, + "loss": 0.06655412912368774, + "loss_ce": 0.0016127170529216528, + "loss_iou": 0.3046875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 919531888, + "step": 9484 + }, + { + "epoch": 0.927356276886977, + "grad_norm": 33.042969923398296, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 919629028, + "step": 9485 + }, + { + "epoch": 0.927356276886977, + "loss": 0.07842594385147095, + "loss_ce": 0.0022388107609003782, + "loss_iou": 0.322265625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 919629028, + "step": 9485 + }, + { + "epoch": 0.9274540477121627, + "grad_norm": 18.945981824556647, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 919726884, + "step": 9486 + }, + { + "epoch": 0.9274540477121627, + "loss": 0.058325693011283875, + "loss_ce": 0.0042332848533988, + "loss_iou": 0.337890625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 919726884, + "step": 9486 + }, + { + "epoch": 0.9275518185373485, + "grad_norm": 21.187211836608945, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 919824244, + "step": 9487 + }, + { + "epoch": 0.9275518185373485, + "loss": 0.0767260193824768, + "loss_ce": 0.007390078157186508, + "loss_iou": 0.2451171875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 919824244, + "step": 9487 + }, + { + "epoch": 0.9276495893625342, + "grad_norm": 9.494963975417052, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 919920152, + "step": 9488 + }, + { + "epoch": 0.9276495893625342, + "loss": 0.07429283857345581, + "loss_ce": 0.006858999840915203, + "loss_iou": 0.2001953125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 919920152, + "step": 9488 + }, + { + "epoch": 0.92774736018772, + "grad_norm": 11.130343614439846, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 920016472, + "step": 9489 + }, + { + "epoch": 0.92774736018772, + "loss": 0.06281368434429169, + "loss_ce": 0.00516598392277956, + "loss_iou": 0.32421875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 920016472, + "step": 9489 + }, + { + "epoch": 0.9278451310129058, + "grad_norm": 10.054013016815551, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 920112652, + "step": 9490 + }, + { + "epoch": 0.9278451310129058, + "loss": 0.06985171139240265, + "loss_ce": 0.006802396848797798, + "loss_iou": 0.2392578125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 920112652, + "step": 9490 + }, + { + "epoch": 0.9279429018380915, + "grad_norm": 4.348437475478269, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 920208816, + "step": 9491 + }, + { + "epoch": 0.9279429018380915, + "loss": 0.04485097900032997, + "loss_ce": 0.010385190136730671, + "loss_iou": 0.1337890625, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 920208816, + "step": 9491 + }, + { + "epoch": 0.9280406726632773, + "grad_norm": 5.8157420223782585, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 920305220, + "step": 9492 + }, + { + "epoch": 0.9280406726632773, + "loss": 0.09064437448978424, + "loss_ce": 0.004119404591619968, + "loss_iou": 0.359375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 920305220, + "step": 9492 + }, + { + "epoch": 0.9281384434884631, + "grad_norm": 5.228764787303912, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 920402444, + "step": 9493 + }, + { + "epoch": 0.9281384434884631, + "loss": 0.04269600659608841, + "loss_ce": 0.0010547733400017023, + "loss_iou": 0.23046875, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 920402444, + "step": 9493 + }, + { + "epoch": 0.9282362143136488, + "grad_norm": 9.00630391633134, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 920499544, + "step": 9494 + }, + { + "epoch": 0.9282362143136488, + "loss": 0.06554383039474487, + "loss_ce": 0.004462898708879948, + "loss_iou": 0.279296875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 920499544, + "step": 9494 + }, + { + "epoch": 0.9283339851388346, + "grad_norm": 5.237255012984356, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 920596464, + "step": 9495 + }, + { + "epoch": 0.9283339851388346, + "loss": 0.06308849155902863, + "loss_ce": 0.009888721629977226, + "loss_iou": 0.228515625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 920596464, + "step": 9495 + }, + { + "epoch": 0.9284317559640204, + "grad_norm": 10.74151881891485, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 920693508, + "step": 9496 + }, + { + "epoch": 0.9284317559640204, + "loss": 0.06045359745621681, + "loss_ce": 0.003881639102473855, + "loss_iou": 0.263671875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 920693508, + "step": 9496 + }, + { + "epoch": 0.9285295267892061, + "grad_norm": 16.449500844011606, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 920790744, + "step": 9497 + }, + { + "epoch": 0.9285295267892061, + "loss": 0.08094136416912079, + "loss_ce": 0.0061427755281329155, + "loss_iou": 0.29296875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 920790744, + "step": 9497 + }, + { + "epoch": 0.9286272976143919, + "grad_norm": 16.609401175837952, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 920888452, + "step": 9498 + }, + { + "epoch": 0.9286272976143919, + "loss": 0.09617769718170166, + "loss_ce": 0.011010768823325634, + "loss_iou": 0.296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 920888452, + "step": 9498 + }, + { + "epoch": 0.9287250684395776, + "grad_norm": 23.30463695896934, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 920986472, + "step": 9499 + }, + { + "epoch": 0.9287250684395776, + "loss": 0.06565000116825104, + "loss_ce": 0.003165255766361952, + "loss_iou": 0.302734375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 920986472, + "step": 9499 + }, + { + "epoch": 0.9288228392647634, + "grad_norm": 21.30890780521379, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 921083012, + "step": 9500 + }, + { + "epoch": 0.9288228392647634, + "eval_seeclick_CIoU": 0.5931436121463776, + "eval_seeclick_GIoU": 0.5805618464946747, + "eval_seeclick_IoU": 0.6248980462551117, + "eval_seeclick_MAE_all": 0.05871574394404888, + "eval_seeclick_MAE_h": 0.027754352428019047, + "eval_seeclick_MAE_w": 0.08621509373188019, + "eval_seeclick_MAE_x": 0.09341662004590034, + "eval_seeclick_MAE_y": 0.02747691608965397, + "eval_seeclick_NUM_probability": 0.9999984800815582, + "eval_seeclick_inside_bbox": 0.8607954680919647, + "eval_seeclick_loss": 0.21443556249141693, + "eval_seeclick_loss_ce": 0.010352815501391888, + "eval_seeclick_loss_iou": 0.43023681640625, + "eval_seeclick_loss_num": 0.040302276611328125, + "eval_seeclick_loss_xval": 0.201446533203125, + "eval_seeclick_runtime": 76.5675, + "eval_seeclick_samples_per_second": 0.562, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 921083012, + "step": 9500 + }, + { + "epoch": 0.9288228392647634, + "eval_icons_CIoU": 0.7039254605770111, + "eval_icons_GIoU": 0.7076777517795563, + "eval_icons_IoU": 0.7286001741886139, + "eval_icons_MAE_all": 0.04756401851773262, + "eval_icons_MAE_h": 0.05484945327043533, + "eval_icons_MAE_w": 0.041332341730594635, + "eval_icons_MAE_x": 0.0399443618953228, + "eval_icons_MAE_y": 0.05412992462515831, + "eval_icons_NUM_probability": 0.9999988973140717, + "eval_icons_inside_bbox": 0.8229166567325592, + "eval_icons_loss": 0.14914292097091675, + "eval_icons_loss_ce": 2.008123203722789e-06, + "eval_icons_loss_iou": 0.38922119140625, + "eval_icons_loss_num": 0.03375434875488281, + "eval_icons_loss_xval": 0.168792724609375, + "eval_icons_runtime": 85.7946, + "eval_icons_samples_per_second": 0.583, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 921083012, + "step": 9500 + }, + { + "epoch": 0.9288228392647634, + "eval_screenspot_CIoU": 0.2951404998699824, + "eval_screenspot_GIoU": 0.2643621265888214, + "eval_screenspot_IoU": 0.39418693880240124, + "eval_screenspot_MAE_all": 0.17380793392658234, + "eval_screenspot_MAE_h": 0.12595058977603912, + "eval_screenspot_MAE_w": 0.2187610243757566, + "eval_screenspot_MAE_x": 0.22641212989886603, + "eval_screenspot_MAE_y": 0.12410798172156016, + "eval_screenspot_NUM_probability": 0.9999990463256836, + "eval_screenspot_inside_bbox": 0.6195833285649618, + "eval_screenspot_loss": 0.5942825675010681, + "eval_screenspot_loss_ce": 0.02062149407962958, + "eval_screenspot_loss_iou": 0.30859375, + "eval_screenspot_loss_num": 0.1174774169921875, + "eval_screenspot_loss_xval": 0.5875244140625, + "eval_screenspot_runtime": 151.09, + "eval_screenspot_samples_per_second": 0.589, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 921083012, + "step": 9500 + }, + { + "epoch": 0.9288228392647634, + "eval_compot_CIoU": 0.47634918987751007, + "eval_compot_GIoU": 0.45426520705223083, + "eval_compot_IoU": 0.536201000213623, + "eval_compot_MAE_all": 0.09249140694737434, + "eval_compot_MAE_h": 0.0838758796453476, + "eval_compot_MAE_w": 0.10439655929803848, + "eval_compot_MAE_x": 0.09639960527420044, + "eval_compot_MAE_y": 0.08529357612133026, + "eval_compot_NUM_probability": 0.9999961853027344, + "eval_compot_inside_bbox": 0.7083333432674408, + "eval_compot_loss": 0.2988828718662262, + "eval_compot_loss_ce": 0.024014342576265335, + "eval_compot_loss_iou": 0.46337890625, + "eval_compot_loss_num": 0.049030303955078125, + "eval_compot_loss_xval": 0.2451171875, + "eval_compot_runtime": 98.7356, + "eval_compot_samples_per_second": 0.506, + "eval_compot_steps_per_second": 0.02, + "num_input_tokens_seen": 921083012, + "step": 9500 + }, + { + "epoch": 0.9288228392647634, + "loss": 0.24263393878936768, + "loss_ce": 0.02043544501066208, + "loss_iou": 0.498046875, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 921083012, + "step": 9500 + }, + { + "epoch": 0.9289206100899492, + "grad_norm": 13.22392942334276, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 921179660, + "step": 9501 + }, + { + "epoch": 0.9289206100899492, + "loss": 0.044215861707925797, + "loss_ce": 0.004630749579519033, + "loss_iou": 0.26171875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 921179660, + "step": 9501 + }, + { + "epoch": 0.9290183809151349, + "grad_norm": 9.945313155709671, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 921276992, + "step": 9502 + }, + { + "epoch": 0.9290183809151349, + "loss": 0.08456303924322128, + "loss_ce": 0.010508321225643158, + "loss_iou": 0.287109375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 921276992, + "step": 9502 + }, + { + "epoch": 0.9291161517403207, + "grad_norm": 5.769091099965099, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 921373112, + "step": 9503 + }, + { + "epoch": 0.9291161517403207, + "loss": 0.032296136021614075, + "loss_ce": 0.002785639138892293, + "loss_iou": 0.2255859375, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 921373112, + "step": 9503 + }, + { + "epoch": 0.9292139225655065, + "grad_norm": 4.8528145283588, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 921470296, + "step": 9504 + }, + { + "epoch": 0.9292139225655065, + "loss": 0.057324327528476715, + "loss_ce": 0.0038270126096904278, + "loss_iou": 0.3125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 921470296, + "step": 9504 + }, + { + "epoch": 0.9293116933906922, + "grad_norm": 5.497224755822646, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 921567496, + "step": 9505 + }, + { + "epoch": 0.9293116933906922, + "loss": 0.07235492765903473, + "loss_ce": 0.008100161328911781, + "loss_iou": 0.26953125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 921567496, + "step": 9505 + }, + { + "epoch": 0.929409464215878, + "grad_norm": 6.348609246663164, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 921664352, + "step": 9506 + }, + { + "epoch": 0.929409464215878, + "loss": 0.05700073763728142, + "loss_ce": 0.006188966799527407, + "loss_iou": 0.33984375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 921664352, + "step": 9506 + }, + { + "epoch": 0.9295072350410637, + "grad_norm": 3.332372306575864, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 921761324, + "step": 9507 + }, + { + "epoch": 0.9295072350410637, + "loss": 0.05993020907044411, + "loss_ce": 0.004479771479964256, + "loss_iou": 0.30078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 921761324, + "step": 9507 + }, + { + "epoch": 0.9296050058662495, + "grad_norm": 6.531820394720456, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 921858104, + "step": 9508 + }, + { + "epoch": 0.9296050058662495, + "loss": 0.09810125082731247, + "loss_ce": 0.005785578396171331, + "loss_iou": 0.244140625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 921858104, + "step": 9508 + }, + { + "epoch": 0.9297027766914353, + "grad_norm": 6.249071128908712, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 921954764, + "step": 9509 + }, + { + "epoch": 0.9297027766914353, + "loss": 0.08114787191152573, + "loss_ce": 0.005098062567412853, + "loss_iou": 0.2001953125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 921954764, + "step": 9509 + }, + { + "epoch": 0.929800547516621, + "grad_norm": 16.772057962647676, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 922052680, + "step": 9510 + }, + { + "epoch": 0.929800547516621, + "loss": 0.07021863758563995, + "loss_ce": 0.0067725894041359425, + "loss_iou": 0.283203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 922052680, + "step": 9510 + }, + { + "epoch": 0.9298983183418068, + "grad_norm": 10.851349575048102, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 922149416, + "step": 9511 + }, + { + "epoch": 0.9298983183418068, + "loss": 0.08025933802127838, + "loss_ce": 0.0033550341613590717, + "loss_iou": 0.3359375, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 922149416, + "step": 9511 + }, + { + "epoch": 0.9299960891669926, + "grad_norm": 63.57219472813729, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 922246840, + "step": 9512 + }, + { + "epoch": 0.9299960891669926, + "loss": 0.09712547063827515, + "loss_ce": 0.005801614839583635, + "loss_iou": 0.248046875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 922246840, + "step": 9512 + }, + { + "epoch": 0.9300938599921783, + "grad_norm": 27.311737615473938, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 922344544, + "step": 9513 + }, + { + "epoch": 0.9300938599921783, + "loss": 0.11723560094833374, + "loss_ce": 0.0036186533980071545, + "loss_iou": 0.31640625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 922344544, + "step": 9513 + }, + { + "epoch": 0.9301916308173641, + "grad_norm": 17.25859508336038, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 922441320, + "step": 9514 + }, + { + "epoch": 0.9301916308173641, + "loss": 0.08984603732824326, + "loss_ce": 0.007547761779278517, + "loss_iou": 0.263671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 922441320, + "step": 9514 + }, + { + "epoch": 0.9302894016425499, + "grad_norm": 6.556391569145861, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 922537816, + "step": 9515 + }, + { + "epoch": 0.9302894016425499, + "loss": 0.05473874509334564, + "loss_ce": 0.008451208472251892, + "loss_iou": 0.193359375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 922537816, + "step": 9515 + }, + { + "epoch": 0.9303871724677356, + "grad_norm": 3.0423646547630883, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 922634952, + "step": 9516 + }, + { + "epoch": 0.9303871724677356, + "loss": 0.034428466111421585, + "loss_ce": 0.004078734666109085, + "loss_iou": 0.318359375, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 922634952, + "step": 9516 + }, + { + "epoch": 0.9304849432929214, + "grad_norm": 14.9946778695034, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 922732488, + "step": 9517 + }, + { + "epoch": 0.9304849432929214, + "loss": 0.0974646508693695, + "loss_ce": 0.0049353488720953465, + "loss_iou": 0.396484375, + "loss_num": 0.0184326171875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 922732488, + "step": 9517 + }, + { + "epoch": 0.9305827141181071, + "grad_norm": 19.913733771056137, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 922829764, + "step": 9518 + }, + { + "epoch": 0.9305827141181071, + "loss": 0.10487393289804459, + "loss_ce": 0.007210048846900463, + "loss_iou": 0.2451171875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 922829764, + "step": 9518 + }, + { + "epoch": 0.9306804849432929, + "grad_norm": 10.486764706289437, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 922927264, + "step": 9519 + }, + { + "epoch": 0.9306804849432929, + "loss": 0.0855291560292244, + "loss_ce": 0.004215065389871597, + "loss_iou": 0.29296875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 922927264, + "step": 9519 + }, + { + "epoch": 0.9307782557684787, + "grad_norm": 13.580835322865536, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 923024828, + "step": 9520 + }, + { + "epoch": 0.9307782557684787, + "loss": 0.06897787004709244, + "loss_ce": 0.007942711003124714, + "loss_iou": 0.3359375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 923024828, + "step": 9520 + }, + { + "epoch": 0.9308760265936644, + "grad_norm": 15.422572803669622, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 923122156, + "step": 9521 + }, + { + "epoch": 0.9308760265936644, + "loss": 0.06903182715177536, + "loss_ce": 0.005089873448014259, + "loss_iou": 0.330078125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 923122156, + "step": 9521 + }, + { + "epoch": 0.9309737974188502, + "grad_norm": 13.073235542626229, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 923219124, + "step": 9522 + }, + { + "epoch": 0.9309737974188502, + "loss": 0.08164643496274948, + "loss_ce": 0.005398271605372429, + "loss_iou": 0.369140625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 923219124, + "step": 9522 + }, + { + "epoch": 0.931071568244036, + "grad_norm": 7.278129704504144, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 923315128, + "step": 9523 + }, + { + "epoch": 0.931071568244036, + "loss": 0.04932285100221634, + "loss_ce": 0.0034396694973111153, + "loss_iou": 0.271484375, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 923315128, + "step": 9523 + }, + { + "epoch": 0.9311693390692217, + "grad_norm": 4.068927432349862, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 923411956, + "step": 9524 + }, + { + "epoch": 0.9311693390692217, + "loss": 0.06961975246667862, + "loss_ce": 0.0033050519414246082, + "loss_iou": 0.326171875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 923411956, + "step": 9524 + }, + { + "epoch": 0.9312671098944075, + "grad_norm": 17.40418730133281, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 923509308, + "step": 9525 + }, + { + "epoch": 0.9312671098944075, + "loss": 0.0668293908238411, + "loss_ce": 0.004832929000258446, + "loss_iou": 0.421875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 923509308, + "step": 9525 + }, + { + "epoch": 0.9313648807195932, + "grad_norm": 13.000877363335883, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 923605184, + "step": 9526 + }, + { + "epoch": 0.9313648807195932, + "loss": 0.060278281569480896, + "loss_ce": 0.007009850814938545, + "loss_iou": 0.22265625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 923605184, + "step": 9526 + }, + { + "epoch": 0.931462651544779, + "grad_norm": 10.045240771972608, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 923702572, + "step": 9527 + }, + { + "epoch": 0.931462651544779, + "loss": 0.07102485001087189, + "loss_ce": 0.004557562060654163, + "loss_iou": 0.390625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 923702572, + "step": 9527 + }, + { + "epoch": 0.9315604223699648, + "grad_norm": 13.735373866697593, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 923799460, + "step": 9528 + }, + { + "epoch": 0.9315604223699648, + "loss": 0.11590905487537384, + "loss_ce": 0.010714961215853691, + "loss_iou": 0.326171875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 923799460, + "step": 9528 + }, + { + "epoch": 0.9316581931951505, + "grad_norm": 8.262329674471264, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 923895936, + "step": 9529 + }, + { + "epoch": 0.9316581931951505, + "loss": 0.088898666203022, + "loss_ce": 0.011017805896699429, + "loss_iou": 0.337890625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 923895936, + "step": 9529 + }, + { + "epoch": 0.9317559640203363, + "grad_norm": 5.072972050409571, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 923992756, + "step": 9530 + }, + { + "epoch": 0.9317559640203363, + "loss": 0.10147181153297424, + "loss_ce": 0.006888297852128744, + "loss_iou": 0.244140625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 923992756, + "step": 9530 + }, + { + "epoch": 0.9318537348455221, + "grad_norm": 12.711383220684523, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 924089004, + "step": 9531 + }, + { + "epoch": 0.9318537348455221, + "loss": 0.07298256456851959, + "loss_ce": 0.002929464913904667, + "loss_iou": 0.3125, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 924089004, + "step": 9531 + }, + { + "epoch": 0.9319515056707078, + "grad_norm": 4.618712458221744, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 924185672, + "step": 9532 + }, + { + "epoch": 0.9319515056707078, + "loss": 0.033115729689598083, + "loss_ce": 0.005446778144687414, + "loss_iou": 0.203125, + "loss_num": 0.005523681640625, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 924185672, + "step": 9532 + }, + { + "epoch": 0.9320492764958936, + "grad_norm": 6.149183797119137, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 924282732, + "step": 9533 + }, + { + "epoch": 0.9320492764958936, + "loss": 0.1247527152299881, + "loss_ce": 0.007565211970359087, + "loss_iou": 0.236328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 924282732, + "step": 9533 + }, + { + "epoch": 0.9321470473210793, + "grad_norm": 2.7422515102075318, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 924379360, + "step": 9534 + }, + { + "epoch": 0.9321470473210793, + "loss": 0.05960792303085327, + "loss_ce": 0.0048288702964782715, + "loss_iou": 0.251953125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 924379360, + "step": 9534 + }, + { + "epoch": 0.9322448181462651, + "grad_norm": 8.47863723204313, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 924476260, + "step": 9535 + }, + { + "epoch": 0.9322448181462651, + "loss": 0.06684919446706772, + "loss_ce": 0.005226571578532457, + "loss_iou": 0.26171875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 924476260, + "step": 9535 + }, + { + "epoch": 0.932342588971451, + "grad_norm": 5.482315999053171, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 924573456, + "step": 9536 + }, + { + "epoch": 0.932342588971451, + "loss": 0.04153286665678024, + "loss_ce": 0.0013793621910735965, + "loss_iou": 0.224609375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 924573456, + "step": 9536 + }, + { + "epoch": 0.9324403597966366, + "grad_norm": 3.924325328616656, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 924670736, + "step": 9537 + }, + { + "epoch": 0.9324403597966366, + "loss": 0.06615738570690155, + "loss_ce": 0.003397987689822912, + "loss_iou": 0.2314453125, + "loss_num": 0.01251220703125, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 924670736, + "step": 9537 + }, + { + "epoch": 0.9325381306218224, + "grad_norm": 12.288388920813986, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 924768220, + "step": 9538 + }, + { + "epoch": 0.9325381306218224, + "loss": 0.0507703572511673, + "loss_ce": 0.007649022154510021, + "loss_iou": 0.328125, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 924768220, + "step": 9538 + }, + { + "epoch": 0.9326359014470083, + "grad_norm": 7.085975694719112, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 924865008, + "step": 9539 + }, + { + "epoch": 0.9326359014470083, + "loss": 0.049797773361206055, + "loss_ce": 0.00785135943442583, + "loss_iou": 0.31640625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 924865008, + "step": 9539 + }, + { + "epoch": 0.932733672272194, + "grad_norm": 7.3570188419251705, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 924963168, + "step": 9540 + }, + { + "epoch": 0.932733672272194, + "loss": 0.06795865297317505, + "loss_ce": 0.0070455679669976234, + "loss_iou": 0.349609375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 924963168, + "step": 9540 + }, + { + "epoch": 0.9328314430973798, + "grad_norm": 6.934200993256229, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 925060796, + "step": 9541 + }, + { + "epoch": 0.9328314430973798, + "loss": 0.08645627647638321, + "loss_ce": 0.0014266747748479247, + "loss_iou": 0.390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 925060796, + "step": 9541 + }, + { + "epoch": 0.9329292139225656, + "grad_norm": 13.894123268471118, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 925157984, + "step": 9542 + }, + { + "epoch": 0.9329292139225656, + "loss": 0.09978575259447098, + "loss_ce": 0.003579087555408478, + "loss_iou": 0.267578125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 925157984, + "step": 9542 + }, + { + "epoch": 0.9330269847477513, + "grad_norm": 8.337520696932758, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 925255428, + "step": 9543 + }, + { + "epoch": 0.9330269847477513, + "loss": 0.1056532934308052, + "loss_ce": 0.0023818076588213444, + "loss_iou": 0.330078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 925255428, + "step": 9543 + }, + { + "epoch": 0.9331247555729371, + "grad_norm": 5.543903595220342, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 925352292, + "step": 9544 + }, + { + "epoch": 0.9331247555729371, + "loss": 0.07123172283172607, + "loss_ce": 0.004230384714901447, + "loss_iou": 0.2412109375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 925352292, + "step": 9544 + }, + { + "epoch": 0.9332225263981228, + "grad_norm": 8.86391746481562, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 925449908, + "step": 9545 + }, + { + "epoch": 0.9332225263981228, + "loss": 0.1340901255607605, + "loss_ce": 0.00741166528314352, + "loss_iou": 0.306640625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 925449908, + "step": 9545 + }, + { + "epoch": 0.9333202972233086, + "grad_norm": 18.211314301872306, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 925547592, + "step": 9546 + }, + { + "epoch": 0.9333202972233086, + "loss": 0.06521021574735641, + "loss_ce": 0.0027178425807505846, + "loss_iou": 0.259765625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 925547592, + "step": 9546 + }, + { + "epoch": 0.9334180680484944, + "grad_norm": 16.924634788494146, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 925644336, + "step": 9547 + }, + { + "epoch": 0.9334180680484944, + "loss": 0.07815594971179962, + "loss_ce": 0.01024671457707882, + "loss_iou": 0.1611328125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 925644336, + "step": 9547 + }, + { + "epoch": 0.9335158388736801, + "grad_norm": 5.410565856960129, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 925740744, + "step": 9548 + }, + { + "epoch": 0.9335158388736801, + "loss": 0.04420076310634613, + "loss_ce": 0.002887592650949955, + "loss_iou": 0.169921875, + "loss_num": 0.00830078125, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 925740744, + "step": 9548 + }, + { + "epoch": 0.9336136096988659, + "grad_norm": 4.975013014100391, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 925837912, + "step": 9549 + }, + { + "epoch": 0.9336136096988659, + "loss": 0.08618902415037155, + "loss_ce": 0.0068623945116996765, + "loss_iou": 0.1728515625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 925837912, + "step": 9549 + }, + { + "epoch": 0.9337113805240517, + "grad_norm": 1.7628998486644223, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 925934436, + "step": 9550 + }, + { + "epoch": 0.9337113805240517, + "loss": 0.04423312097787857, + "loss_ce": 0.004300867673009634, + "loss_iou": 0.166015625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 925934436, + "step": 9550 + }, + { + "epoch": 0.9338091513492374, + "grad_norm": 11.091439604988523, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 926030680, + "step": 9551 + }, + { + "epoch": 0.9338091513492374, + "loss": 0.04162145406007767, + "loss_ce": 0.0032322474289685488, + "loss_iou": 0.2421875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 926030680, + "step": 9551 + }, + { + "epoch": 0.9339069221744232, + "grad_norm": 8.176075904582838, + "learning_rate": 5e-05, + "loss": 0.0915, + "num_input_tokens_seen": 926126592, + "step": 9552 + }, + { + "epoch": 0.9339069221744232, + "loss": 0.07569526135921478, + "loss_ce": 0.0049402592703700066, + "loss_iou": 0.265625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 926126592, + "step": 9552 + }, + { + "epoch": 0.9340046929996089, + "grad_norm": 11.469762781986864, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 926223224, + "step": 9553 + }, + { + "epoch": 0.9340046929996089, + "loss": 0.05449627712368965, + "loss_ce": 0.007621277589350939, + "loss_iou": 0.2578125, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 926223224, + "step": 9553 + }, + { + "epoch": 0.9341024638247947, + "grad_norm": 15.607004894673448, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 926320744, + "step": 9554 + }, + { + "epoch": 0.9341024638247947, + "loss": 0.10914769023656845, + "loss_ce": 0.008592270314693451, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 926320744, + "step": 9554 + }, + { + "epoch": 0.9342002346499805, + "grad_norm": 3.195852118796053, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 926418036, + "step": 9555 + }, + { + "epoch": 0.9342002346499805, + "loss": 0.0794694796204567, + "loss_ce": 0.010560788214206696, + "loss_iou": 0.333984375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 926418036, + "step": 9555 + }, + { + "epoch": 0.9342980054751662, + "grad_norm": 17.59893746374127, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 926515996, + "step": 9556 + }, + { + "epoch": 0.9342980054751662, + "loss": 0.049066975712776184, + "loss_ce": 0.004465537145733833, + "loss_iou": 0.306640625, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 926515996, + "step": 9556 + }, + { + "epoch": 0.934395776300352, + "grad_norm": 6.501984029459021, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 926612708, + "step": 9557 + }, + { + "epoch": 0.934395776300352, + "loss": 0.08984942734241486, + "loss_ce": 0.008909177035093307, + "loss_iou": 0.1748046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 926612708, + "step": 9557 + }, + { + "epoch": 0.9344935471255378, + "grad_norm": 8.383721597911185, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 926709208, + "step": 9558 + }, + { + "epoch": 0.9344935471255378, + "loss": 0.047425273805856705, + "loss_ce": 0.005097392480820417, + "loss_iou": 0.337890625, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 926709208, + "step": 9558 + }, + { + "epoch": 0.9345913179507235, + "grad_norm": 12.66496451104857, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 926806188, + "step": 9559 + }, + { + "epoch": 0.9345913179507235, + "loss": 0.10990700870752335, + "loss_ce": 0.0048120953142642975, + "loss_iou": 0.29296875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 926806188, + "step": 9559 + }, + { + "epoch": 0.9346890887759093, + "grad_norm": 9.35507729694316, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 926902128, + "step": 9560 + }, + { + "epoch": 0.9346890887759093, + "loss": 0.0978965312242508, + "loss_ce": 0.010158490389585495, + "loss_iou": 0.21484375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 926902128, + "step": 9560 + }, + { + "epoch": 0.9347868596010951, + "grad_norm": 5.881014749600073, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 926999516, + "step": 9561 + }, + { + "epoch": 0.9347868596010951, + "loss": 0.05502694845199585, + "loss_ce": 0.010074557736515999, + "loss_iou": 0.2353515625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 926999516, + "step": 9561 + }, + { + "epoch": 0.9348846304262808, + "grad_norm": 14.192715878502998, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 927095492, + "step": 9562 + }, + { + "epoch": 0.9348846304262808, + "loss": 0.06641444563865662, + "loss_ce": 0.003006541635841131, + "loss_iou": 0.296875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 927095492, + "step": 9562 + }, + { + "epoch": 0.9349824012514666, + "grad_norm": 16.22769701938681, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 927192396, + "step": 9563 + }, + { + "epoch": 0.9349824012514666, + "loss": 0.04198620468378067, + "loss_ce": 0.00483105331659317, + "loss_iou": 0.185546875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 927192396, + "step": 9563 + }, + { + "epoch": 0.9350801720766523, + "grad_norm": 3.072492813987468, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 927289360, + "step": 9564 + }, + { + "epoch": 0.9350801720766523, + "loss": 0.04358077421784401, + "loss_ce": 0.0040528797544538975, + "loss_iou": 0.283203125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 927289360, + "step": 9564 + }, + { + "epoch": 0.9351779429018381, + "grad_norm": 4.04432484573749, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 927385200, + "step": 9565 + }, + { + "epoch": 0.9351779429018381, + "loss": 0.04878109693527222, + "loss_ce": 0.005869565065950155, + "loss_iou": 0.328125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 927385200, + "step": 9565 + }, + { + "epoch": 0.9352757137270239, + "grad_norm": 7.945888178306817, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 927481852, + "step": 9566 + }, + { + "epoch": 0.9352757137270239, + "loss": 0.11470390856266022, + "loss_ce": 0.006541985087096691, + "loss_iou": 0.162109375, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 927481852, + "step": 9566 + }, + { + "epoch": 0.9353734845522096, + "grad_norm": 4.900129692690101, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 927579160, + "step": 9567 + }, + { + "epoch": 0.9353734845522096, + "loss": 0.06884640455245972, + "loss_ce": 0.00401181448251009, + "loss_iou": 0.25, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 927579160, + "step": 9567 + }, + { + "epoch": 0.9354712553773954, + "grad_norm": 1.6261434525463536, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 927676308, + "step": 9568 + }, + { + "epoch": 0.9354712553773954, + "loss": 0.07482808083295822, + "loss_ce": 0.003661090973764658, + "loss_iou": 0.2265625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 927676308, + "step": 9568 + }, + { + "epoch": 0.9355690262025812, + "grad_norm": 4.3837895526526935, + "learning_rate": 5e-05, + "loss": 0.0274, + "num_input_tokens_seen": 927773312, + "step": 9569 + }, + { + "epoch": 0.9355690262025812, + "loss": 0.02429986372590065, + "loss_ce": 0.0028688940219581127, + "loss_iou": 0.26953125, + "loss_num": 0.0042724609375, + "loss_xval": 0.021484375, + "num_input_tokens_seen": 927773312, + "step": 9569 + }, + { + "epoch": 0.9356667970277669, + "grad_norm": 13.681863795338831, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 927869656, + "step": 9570 + }, + { + "epoch": 0.9356667970277669, + "loss": 0.06439997255802155, + "loss_ce": 0.003151192329823971, + "loss_iou": 0.2158203125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 927869656, + "step": 9570 + }, + { + "epoch": 0.9357645678529527, + "grad_norm": 13.582957964613987, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 927965924, + "step": 9571 + }, + { + "epoch": 0.9357645678529527, + "loss": 0.09477236866950989, + "loss_ce": 0.003459956729784608, + "loss_iou": 0.2392578125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 927965924, + "step": 9571 + }, + { + "epoch": 0.9358623386781384, + "grad_norm": 9.150795645927895, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 928062820, + "step": 9572 + }, + { + "epoch": 0.9358623386781384, + "loss": 0.07285083830356598, + "loss_ce": 0.005925788078457117, + "loss_iou": 0.33203125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 928062820, + "step": 9572 + }, + { + "epoch": 0.9359601095033242, + "grad_norm": 8.46951889283545, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 928159892, + "step": 9573 + }, + { + "epoch": 0.9359601095033242, + "loss": 0.03617168962955475, + "loss_ce": 0.0014503144193440676, + "loss_iou": 0.28125, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 928159892, + "step": 9573 + }, + { + "epoch": 0.93605788032851, + "grad_norm": 49.019732273844625, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 928257108, + "step": 9574 + }, + { + "epoch": 0.93605788032851, + "loss": 0.10401998460292816, + "loss_ce": 0.006119590252637863, + "loss_iou": 0.259765625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 928257108, + "step": 9574 + }, + { + "epoch": 0.9361556511536957, + "grad_norm": 15.429978248509753, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 928353932, + "step": 9575 + }, + { + "epoch": 0.9361556511536957, + "loss": 0.06916043907403946, + "loss_ce": 0.0035857942420989275, + "loss_iou": 0.220703125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 928353932, + "step": 9575 + }, + { + "epoch": 0.9362534219788815, + "grad_norm": 10.391685607972969, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 928450688, + "step": 9576 + }, + { + "epoch": 0.9362534219788815, + "loss": 0.06535157561302185, + "loss_ce": 0.002851574681699276, + "loss_iou": 0.326171875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 928450688, + "step": 9576 + }, + { + "epoch": 0.9363511928040673, + "grad_norm": 23.308091969521765, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 928547484, + "step": 9577 + }, + { + "epoch": 0.9363511928040673, + "loss": 0.0705101415514946, + "loss_ce": 0.004706611856818199, + "loss_iou": 0.2138671875, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 928547484, + "step": 9577 + }, + { + "epoch": 0.936448963629253, + "grad_norm": 17.621749038755677, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 928644384, + "step": 9578 + }, + { + "epoch": 0.936448963629253, + "loss": 0.03515266627073288, + "loss_ce": 0.005741349887102842, + "loss_iou": 0.2578125, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 928644384, + "step": 9578 + }, + { + "epoch": 0.9365467344544388, + "grad_norm": 19.574545773449863, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 928740784, + "step": 9579 + }, + { + "epoch": 0.9365467344544388, + "loss": 0.11851407587528229, + "loss_ce": 0.015677468851208687, + "loss_iou": 0.201171875, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 928740784, + "step": 9579 + }, + { + "epoch": 0.9366445052796245, + "grad_norm": 3.699327282487512, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 928838144, + "step": 9580 + }, + { + "epoch": 0.9366445052796245, + "loss": 0.05481712520122528, + "loss_ce": 0.01523582823574543, + "loss_iou": 0.27734375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 928838144, + "step": 9580 + }, + { + "epoch": 0.9367422761048103, + "grad_norm": 12.741657957649206, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 928934692, + "step": 9581 + }, + { + "epoch": 0.9367422761048103, + "loss": 0.07286767661571503, + "loss_ce": 0.0087807672098279, + "loss_iou": 0.25390625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 928934692, + "step": 9581 + }, + { + "epoch": 0.9368400469299961, + "grad_norm": 8.867131748740446, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 929030432, + "step": 9582 + }, + { + "epoch": 0.9368400469299961, + "loss": 0.09458530694246292, + "loss_ce": 0.003993877209722996, + "loss_iou": 0.3515625, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 929030432, + "step": 9582 + }, + { + "epoch": 0.9369378177551818, + "grad_norm": 12.7206966321343, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 929126536, + "step": 9583 + }, + { + "epoch": 0.9369378177551818, + "loss": 0.07148049771785736, + "loss_ce": 0.011467673815786839, + "loss_iou": 0.236328125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 929126536, + "step": 9583 + }, + { + "epoch": 0.9370355885803676, + "grad_norm": 5.080425911889996, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 929224240, + "step": 9584 + }, + { + "epoch": 0.9370355885803676, + "loss": 0.07464303821325302, + "loss_ce": 0.005543611012399197, + "loss_iou": 0.3046875, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 929224240, + "step": 9584 + }, + { + "epoch": 0.9371333594055534, + "grad_norm": 16.70662118557229, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 929321836, + "step": 9585 + }, + { + "epoch": 0.9371333594055534, + "loss": 0.03577961027622223, + "loss_ce": 0.0030113612301647663, + "loss_iou": 0.34375, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 929321836, + "step": 9585 + }, + { + "epoch": 0.9372311302307391, + "grad_norm": 25.95464823584264, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 929419324, + "step": 9586 + }, + { + "epoch": 0.9372311302307391, + "loss": 0.10195292532444, + "loss_ce": 0.0044797868467867374, + "loss_iou": 0.345703125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 929419324, + "step": 9586 + }, + { + "epoch": 0.9373289010559249, + "grad_norm": 23.86460494720525, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 929516296, + "step": 9587 + }, + { + "epoch": 0.9373289010559249, + "loss": 0.11354020982980728, + "loss_ce": 0.0055079832673072815, + "loss_iou": 0.341796875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 929516296, + "step": 9587 + }, + { + "epoch": 0.9374266718811107, + "grad_norm": 12.610841402620965, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 929612828, + "step": 9588 + }, + { + "epoch": 0.9374266718811107, + "loss": 0.07576924562454224, + "loss_ce": 0.0057161482982337475, + "loss_iou": 0.26171875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 929612828, + "step": 9588 + }, + { + "epoch": 0.9375244427062964, + "grad_norm": 6.868323303096535, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 929709928, + "step": 9589 + }, + { + "epoch": 0.9375244427062964, + "loss": 0.07451119273900986, + "loss_ce": 0.0057322001084685326, + "loss_iou": 0.38671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 929709928, + "step": 9589 + }, + { + "epoch": 0.9376222135314822, + "grad_norm": 6.083823867517896, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 929807380, + "step": 9590 + }, + { + "epoch": 0.9376222135314822, + "loss": 0.05058611184358597, + "loss_ce": 0.003528004279360175, + "loss_iou": 0.341796875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 929807380, + "step": 9590 + }, + { + "epoch": 0.9377199843566679, + "grad_norm": 8.441081203183758, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 929903788, + "step": 9591 + }, + { + "epoch": 0.9377199843566679, + "loss": 0.02898552641272545, + "loss_ce": 0.004064111039042473, + "loss_iou": 0.267578125, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 929903788, + "step": 9591 + }, + { + "epoch": 0.9378177551818537, + "grad_norm": 5.294186726776571, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 930000716, + "step": 9592 + }, + { + "epoch": 0.9378177551818537, + "loss": 0.09246155619621277, + "loss_ce": 0.006981821730732918, + "loss_iou": 0.259765625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 930000716, + "step": 9592 + }, + { + "epoch": 0.9379155260070395, + "grad_norm": 4.282709496688894, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 930097712, + "step": 9593 + }, + { + "epoch": 0.9379155260070395, + "loss": 0.07959719002246857, + "loss_ce": 0.00781220942735672, + "loss_iou": 0.271484375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 930097712, + "step": 9593 + }, + { + "epoch": 0.9380132968322252, + "grad_norm": 5.169800715082801, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 930194632, + "step": 9594 + }, + { + "epoch": 0.9380132968322252, + "loss": 0.07489119470119476, + "loss_ce": 0.010224447585642338, + "loss_iou": 0.263671875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 930194632, + "step": 9594 + }, + { + "epoch": 0.938111067657411, + "grad_norm": 2.7783124165212425, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 930290856, + "step": 9595 + }, + { + "epoch": 0.938111067657411, + "loss": 0.07039301097393036, + "loss_ce": 0.0033611529506742954, + "loss_iou": 0.21484375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 930290856, + "step": 9595 + }, + { + "epoch": 0.9382088384825968, + "grad_norm": 7.685311859053031, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 930388244, + "step": 9596 + }, + { + "epoch": 0.9382088384825968, + "loss": 0.033943548798561096, + "loss_ce": 0.0028957289177924395, + "loss_iou": 0.328125, + "loss_num": 0.0062255859375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 930388244, + "step": 9596 + }, + { + "epoch": 0.9383066093077825, + "grad_norm": 4.6824636756480595, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 930484648, + "step": 9597 + }, + { + "epoch": 0.9383066093077825, + "loss": 0.11294115334749222, + "loss_ce": 0.005397204775363207, + "loss_iou": 0.2470703125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 930484648, + "step": 9597 + }, + { + "epoch": 0.9384043801329683, + "grad_norm": 6.329439538189904, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 930581844, + "step": 9598 + }, + { + "epoch": 0.9384043801329683, + "loss": 0.048478953540325165, + "loss_ce": 0.0028399138245731592, + "loss_iou": 0.216796875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 930581844, + "step": 9598 + }, + { + "epoch": 0.938502150958154, + "grad_norm": 4.608353146096308, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 930678380, + "step": 9599 + }, + { + "epoch": 0.938502150958154, + "loss": 0.05922946333885193, + "loss_ce": 0.0045419614762067795, + "loss_iou": 0.27734375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 930678380, + "step": 9599 + }, + { + "epoch": 0.9385999217833398, + "grad_norm": 13.890477578708389, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 930775604, + "step": 9600 + }, + { + "epoch": 0.9385999217833398, + "loss": 0.06336821615695953, + "loss_ce": 0.0060867262072861195, + "loss_iou": 0.333984375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 930775604, + "step": 9600 + }, + { + "epoch": 0.9386976926085256, + "grad_norm": 19.085302483757612, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 930873864, + "step": 9601 + }, + { + "epoch": 0.9386976926085256, + "loss": 0.07400953769683838, + "loss_ce": 0.002956991083920002, + "loss_iou": 0.376953125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 930873864, + "step": 9601 + }, + { + "epoch": 0.9387954634337113, + "grad_norm": 13.49516391637418, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 930971320, + "step": 9602 + }, + { + "epoch": 0.9387954634337113, + "loss": 0.11593412607908249, + "loss_ce": 0.005231614224612713, + "loss_iou": 0.439453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 930971320, + "step": 9602 + }, + { + "epoch": 0.9388932342588971, + "grad_norm": 9.502077344799256, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 931068228, + "step": 9603 + }, + { + "epoch": 0.9388932342588971, + "loss": 0.09024757146835327, + "loss_ce": 0.006141133606433868, + "loss_iou": 0.26171875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 931068228, + "step": 9603 + }, + { + "epoch": 0.938991005084083, + "grad_norm": 4.903251805898718, + "learning_rate": 5e-05, + "loss": 0.1084, + "num_input_tokens_seen": 931165424, + "step": 9604 + }, + { + "epoch": 0.938991005084083, + "loss": 0.09832848608493805, + "loss_ce": 0.005463492125272751, + "loss_iou": 0.2412109375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 931165424, + "step": 9604 + }, + { + "epoch": 0.9390887759092686, + "grad_norm": 11.64693431519383, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 931262912, + "step": 9605 + }, + { + "epoch": 0.9390887759092686, + "loss": 0.10938720405101776, + "loss_ce": 0.007565302308648825, + "loss_iou": 0.33203125, + "loss_num": 0.020263671875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 931262912, + "step": 9605 + }, + { + "epoch": 0.9391865467344545, + "grad_norm": 8.506074771345647, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 931359424, + "step": 9606 + }, + { + "epoch": 0.9391865467344545, + "loss": 0.047994621098041534, + "loss_ce": 0.0031795576214790344, + "loss_iou": 0.2734375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 931359424, + "step": 9606 + }, + { + "epoch": 0.9392843175596403, + "grad_norm": 4.7479155406204745, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 931456372, + "step": 9607 + }, + { + "epoch": 0.9392843175596403, + "loss": 0.0367230549454689, + "loss_ce": 0.005556978285312653, + "loss_iou": 0.283203125, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 931456372, + "step": 9607 + }, + { + "epoch": 0.939382088384826, + "grad_norm": 21.38389689057551, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 931553476, + "step": 9608 + }, + { + "epoch": 0.939382088384826, + "loss": 0.08187637478113174, + "loss_ce": 0.003095249179750681, + "loss_iou": 0.369140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 931553476, + "step": 9608 + }, + { + "epoch": 0.9394798592100118, + "grad_norm": 28.36079222548727, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 931650244, + "step": 9609 + }, + { + "epoch": 0.9394798592100118, + "loss": 0.06359048187732697, + "loss_ce": 0.0069040777161717415, + "loss_iou": 0.162109375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 931650244, + "step": 9609 + }, + { + "epoch": 0.9395776300351975, + "grad_norm": 15.256544159274394, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 931747988, + "step": 9610 + }, + { + "epoch": 0.9395776300351975, + "loss": 0.09304395318031311, + "loss_ce": 0.003787664696574211, + "loss_iou": 0.333984375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 931747988, + "step": 9610 + }, + { + "epoch": 0.9396754008603833, + "grad_norm": 8.49238260318036, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 931845144, + "step": 9611 + }, + { + "epoch": 0.9396754008603833, + "loss": 0.06887708604335785, + "loss_ce": 0.008025038987398148, + "loss_iou": 0.267578125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 931845144, + "step": 9611 + }, + { + "epoch": 0.9397731716855691, + "grad_norm": 4.648840106492532, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 931942052, + "step": 9612 + }, + { + "epoch": 0.9397731716855691, + "loss": 0.07277286052703857, + "loss_ce": 0.0070074861869215965, + "loss_iou": 0.279296875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 931942052, + "step": 9612 + }, + { + "epoch": 0.9398709425107548, + "grad_norm": 18.033829701852227, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 932039520, + "step": 9613 + }, + { + "epoch": 0.9398709425107548, + "loss": 0.036075785756111145, + "loss_ce": 0.0035268827341496944, + "loss_iou": 0.265625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 932039520, + "step": 9613 + }, + { + "epoch": 0.9399687133359406, + "grad_norm": 8.07137492764218, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 932137204, + "step": 9614 + }, + { + "epoch": 0.9399687133359406, + "loss": 0.05667725205421448, + "loss_ce": 0.006231698207557201, + "loss_iou": 0.205078125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 932137204, + "step": 9614 + }, + { + "epoch": 0.9400664841611264, + "grad_norm": 7.935304299718214, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 932234308, + "step": 9615 + }, + { + "epoch": 0.9400664841611264, + "loss": 0.031158287078142166, + "loss_ce": 0.004302818328142166, + "loss_iou": 0.25390625, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 932234308, + "step": 9615 + }, + { + "epoch": 0.9401642549863121, + "grad_norm": 4.069403618066979, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 932330036, + "step": 9616 + }, + { + "epoch": 0.9401642549863121, + "loss": 0.05375504493713379, + "loss_ce": 0.004461525473743677, + "loss_iou": 0.1396484375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 932330036, + "step": 9616 + }, + { + "epoch": 0.9402620258114979, + "grad_norm": 2.7277287445540983, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 932426004, + "step": 9617 + }, + { + "epoch": 0.9402620258114979, + "loss": 0.10625772923231125, + "loss_ce": 0.005503946915268898, + "loss_iou": 0.279296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 932426004, + "step": 9617 + }, + { + "epoch": 0.9403597966366836, + "grad_norm": 12.644667855750157, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 932524608, + "step": 9618 + }, + { + "epoch": 0.9403597966366836, + "loss": 0.06593473255634308, + "loss_ce": 0.003930640406906605, + "loss_iou": 0.28515625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 932524608, + "step": 9618 + }, + { + "epoch": 0.9404575674618694, + "grad_norm": 15.54567472248083, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 932621564, + "step": 9619 + }, + { + "epoch": 0.9404575674618694, + "loss": 0.06419269740581512, + "loss_ce": 0.00398151483386755, + "loss_iou": 0.287109375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 932621564, + "step": 9619 + }, + { + "epoch": 0.9405553382870552, + "grad_norm": 4.237375909646983, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 932718308, + "step": 9620 + }, + { + "epoch": 0.9405553382870552, + "loss": 0.12260667234659195, + "loss_ce": 0.004335794597864151, + "loss_iou": 0.326171875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 932718308, + "step": 9620 + }, + { + "epoch": 0.9406531091122409, + "grad_norm": 9.901264937293291, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 932815668, + "step": 9621 + }, + { + "epoch": 0.9406531091122409, + "loss": 0.07048603892326355, + "loss_ce": 0.00628468906506896, + "loss_iou": 0.25, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 932815668, + "step": 9621 + }, + { + "epoch": 0.9407508799374267, + "grad_norm": 10.396895543034804, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 932911796, + "step": 9622 + }, + { + "epoch": 0.9407508799374267, + "loss": 0.11037364602088928, + "loss_ce": 0.00748362997546792, + "loss_iou": 0.30859375, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 932911796, + "step": 9622 + }, + { + "epoch": 0.9408486507626125, + "grad_norm": 6.019749629537791, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 933008848, + "step": 9623 + }, + { + "epoch": 0.9408486507626125, + "loss": 0.07442983984947205, + "loss_ce": 0.0035985445138067007, + "loss_iou": 0.28125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 933008848, + "step": 9623 + }, + { + "epoch": 0.9409464215877982, + "grad_norm": 5.428647862340207, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 933105496, + "step": 9624 + }, + { + "epoch": 0.9409464215877982, + "loss": 0.07090175896883011, + "loss_ce": 0.009347804822027683, + "loss_iou": 0.287109375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 933105496, + "step": 9624 + }, + { + "epoch": 0.941044192412984, + "grad_norm": 5.912836520687485, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 933201764, + "step": 9625 + }, + { + "epoch": 0.941044192412984, + "loss": 0.058718740940093994, + "loss_ce": 0.003085196018218994, + "loss_iou": 0.2265625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 933201764, + "step": 9625 + }, + { + "epoch": 0.9411419632381697, + "grad_norm": 4.443177422050029, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 933297536, + "step": 9626 + }, + { + "epoch": 0.9411419632381697, + "loss": 0.05230490490794182, + "loss_ce": 0.00493399566039443, + "loss_iou": 0.2275390625, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 933297536, + "step": 9626 + }, + { + "epoch": 0.9412397340633555, + "grad_norm": 3.705648584280508, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 933395356, + "step": 9627 + }, + { + "epoch": 0.9412397340633555, + "loss": 0.08338114619255066, + "loss_ce": 0.00672861747443676, + "loss_iou": 0.33203125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 933395356, + "step": 9627 + }, + { + "epoch": 0.9413375048885413, + "grad_norm": 4.100193038813437, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 933492896, + "step": 9628 + }, + { + "epoch": 0.9413375048885413, + "loss": 0.037508562207221985, + "loss_ce": 0.003450946882367134, + "loss_iou": 0.283203125, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 933492896, + "step": 9628 + }, + { + "epoch": 0.941435275713727, + "grad_norm": 4.0190104138083385, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 933590068, + "step": 9629 + }, + { + "epoch": 0.941435275713727, + "loss": 0.04256147891283035, + "loss_ce": 0.003102252259850502, + "loss_iou": 0.314453125, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 933590068, + "step": 9629 + }, + { + "epoch": 0.9415330465389128, + "grad_norm": 15.239587478621338, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 933686456, + "step": 9630 + }, + { + "epoch": 0.9415330465389128, + "loss": 0.1250322014093399, + "loss_ce": 0.008912828750908375, + "loss_iou": 0.2890625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 933686456, + "step": 9630 + }, + { + "epoch": 0.9416308173640986, + "grad_norm": 25.1047767632424, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 933783920, + "step": 9631 + }, + { + "epoch": 0.9416308173640986, + "loss": 0.08331373333930969, + "loss_ce": 0.006684090476483107, + "loss_iou": 0.296875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 933783920, + "step": 9631 + }, + { + "epoch": 0.9417285881892843, + "grad_norm": 7.812450106625101, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 933880372, + "step": 9632 + }, + { + "epoch": 0.9417285881892843, + "loss": 0.06173118203878403, + "loss_ce": 0.0023439782671630383, + "loss_iou": 0.2236328125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 933880372, + "step": 9632 + }, + { + "epoch": 0.9418263590144701, + "grad_norm": 3.9175001633882336, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 933977524, + "step": 9633 + }, + { + "epoch": 0.9418263590144701, + "loss": 0.06162773817777634, + "loss_ce": 0.004178394563496113, + "loss_iou": 0.2197265625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 933977524, + "step": 9633 + }, + { + "epoch": 0.9419241298396559, + "grad_norm": 11.210520282968439, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 934074404, + "step": 9634 + }, + { + "epoch": 0.9419241298396559, + "loss": 0.052831005305051804, + "loss_ce": 0.004399608820676804, + "loss_iou": 0.2421875, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 934074404, + "step": 9634 + }, + { + "epoch": 0.9420219006648416, + "grad_norm": 21.927015700611676, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 934171532, + "step": 9635 + }, + { + "epoch": 0.9420219006648416, + "loss": 0.07249622792005539, + "loss_ce": 0.004838753957301378, + "loss_iou": 0.275390625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 934171532, + "step": 9635 + }, + { + "epoch": 0.9421196714900274, + "grad_norm": 11.265481977790312, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 934268924, + "step": 9636 + }, + { + "epoch": 0.9421196714900274, + "loss": 0.058421917259693146, + "loss_ce": 0.003307170933112502, + "loss_iou": 0.37109375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 934268924, + "step": 9636 + }, + { + "epoch": 0.9422174423152131, + "grad_norm": 11.013592012697533, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 934366080, + "step": 9637 + }, + { + "epoch": 0.9422174423152131, + "loss": 0.08507893234491348, + "loss_ce": 0.008373001590371132, + "loss_iou": 0.28125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 934366080, + "step": 9637 + }, + { + "epoch": 0.9423152131403989, + "grad_norm": 4.535411302468613, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 934462716, + "step": 9638 + }, + { + "epoch": 0.9423152131403989, + "loss": 0.04921770095825195, + "loss_ce": 0.0029912032186985016, + "loss_iou": 0.33203125, + "loss_num": 0.00927734375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 934462716, + "step": 9638 + }, + { + "epoch": 0.9424129839655847, + "grad_norm": 3.8803600586977254, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 934560020, + "step": 9639 + }, + { + "epoch": 0.9424129839655847, + "loss": 0.08690216392278671, + "loss_ce": 0.00325348530896008, + "loss_iou": 0.19921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 934560020, + "step": 9639 + }, + { + "epoch": 0.9425107547907704, + "grad_norm": 8.87100074761301, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 934657240, + "step": 9640 + }, + { + "epoch": 0.9425107547907704, + "loss": 0.08578439056873322, + "loss_ce": 0.009505711495876312, + "loss_iou": 0.265625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 934657240, + "step": 9640 + }, + { + "epoch": 0.9426085256159562, + "grad_norm": 27.484545545010466, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 934753956, + "step": 9641 + }, + { + "epoch": 0.9426085256159562, + "loss": 0.07845382392406464, + "loss_ce": 0.008599084801971912, + "loss_iou": 0.2314453125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 934753956, + "step": 9641 + }, + { + "epoch": 0.942706296441142, + "grad_norm": 17.477630642306917, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 934851388, + "step": 9642 + }, + { + "epoch": 0.942706296441142, + "loss": 0.09470289945602417, + "loss_ce": 0.00913161225616932, + "loss_iou": 0.3046875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 934851388, + "step": 9642 + }, + { + "epoch": 0.9428040672663277, + "grad_norm": 10.085199278872764, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 934948128, + "step": 9643 + }, + { + "epoch": 0.9428040672663277, + "loss": 0.05128759145736694, + "loss_ce": 0.0037030568346381187, + "loss_iou": 0.28515625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 934948128, + "step": 9643 + }, + { + "epoch": 0.9429018380915135, + "grad_norm": 11.510609319434833, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 935045916, + "step": 9644 + }, + { + "epoch": 0.9429018380915135, + "loss": 0.07047682255506516, + "loss_ce": 0.003124528331682086, + "loss_iou": 0.357421875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 935045916, + "step": 9644 + }, + { + "epoch": 0.9429996089166992, + "grad_norm": 6.5553370784494716, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 935143356, + "step": 9645 + }, + { + "epoch": 0.9429996089166992, + "loss": 0.0799194946885109, + "loss_ce": 0.0060326214879751205, + "loss_iou": 0.25390625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 935143356, + "step": 9645 + }, + { + "epoch": 0.943097379741885, + "grad_norm": 7.081321867957033, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 935239340, + "step": 9646 + }, + { + "epoch": 0.943097379741885, + "loss": 0.10656242072582245, + "loss_ce": 0.009012987837195396, + "loss_iou": 0.12451171875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 935239340, + "step": 9646 + }, + { + "epoch": 0.9431951505670708, + "grad_norm": 10.505042195376651, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 935336172, + "step": 9647 + }, + { + "epoch": 0.9431951505670708, + "loss": 0.06109922379255295, + "loss_ce": 0.0031577839981764555, + "loss_iou": 0.279296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 935336172, + "step": 9647 + }, + { + "epoch": 0.9432929213922565, + "grad_norm": 11.332294336513757, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 935432404, + "step": 9648 + }, + { + "epoch": 0.9432929213922565, + "loss": 0.12102548778057098, + "loss_ce": 0.006843972485512495, + "loss_iou": 0.2001953125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 935432404, + "step": 9648 + }, + { + "epoch": 0.9433906922174423, + "grad_norm": 3.53206839072752, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 935530528, + "step": 9649 + }, + { + "epoch": 0.9433906922174423, + "loss": 0.06669780611991882, + "loss_ce": 0.0032441362272948027, + "loss_iou": 0.2353515625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 935530528, + "step": 9649 + }, + { + "epoch": 0.9434884630426281, + "grad_norm": 2.707360134639078, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 935627224, + "step": 9650 + }, + { + "epoch": 0.9434884630426281, + "loss": 0.05086816847324371, + "loss_ce": 0.0026618388947099447, + "loss_iou": 0.1806640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 935627224, + "step": 9650 + }, + { + "epoch": 0.9435862338678138, + "grad_norm": 3.2977096773708583, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 935723336, + "step": 9651 + }, + { + "epoch": 0.9435862338678138, + "loss": 0.08956755697727203, + "loss_ce": 0.002577204257249832, + "loss_iou": 0.1376953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 935723336, + "step": 9651 + }, + { + "epoch": 0.9436840046929996, + "grad_norm": 12.429483268951849, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 935819476, + "step": 9652 + }, + { + "epoch": 0.9436840046929996, + "loss": 0.07281540334224701, + "loss_ce": 0.0021672090515494347, + "loss_iou": 0.2119140625, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 935819476, + "step": 9652 + }, + { + "epoch": 0.9437817755181854, + "grad_norm": 8.781526360147257, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 935916200, + "step": 9653 + }, + { + "epoch": 0.9437817755181854, + "loss": 0.059169359505176544, + "loss_ce": 0.0038028410635888577, + "loss_iou": 0.1806640625, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 935916200, + "step": 9653 + }, + { + "epoch": 0.9438795463433711, + "grad_norm": 5.008048056947347, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 936013156, + "step": 9654 + }, + { + "epoch": 0.9438795463433711, + "loss": 0.06384603679180145, + "loss_ce": 0.007281702011823654, + "loss_iou": 0.255859375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 936013156, + "step": 9654 + }, + { + "epoch": 0.9439773171685569, + "grad_norm": 8.932702113634406, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 936110320, + "step": 9655 + }, + { + "epoch": 0.9439773171685569, + "loss": 0.0850941464304924, + "loss_ce": 0.005733185447752476, + "loss_iou": 0.25390625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 936110320, + "step": 9655 + }, + { + "epoch": 0.9440750879937426, + "grad_norm": 9.828828428636195, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 936207952, + "step": 9656 + }, + { + "epoch": 0.9440750879937426, + "loss": 0.06988182663917542, + "loss_ce": 0.006328966468572617, + "loss_iou": 0.259765625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 936207952, + "step": 9656 + }, + { + "epoch": 0.9441728588189284, + "grad_norm": 5.4435932831549705, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 936304828, + "step": 9657 + }, + { + "epoch": 0.9441728588189284, + "loss": 0.08675993233919144, + "loss_ce": 0.003813152899965644, + "loss_iou": 0.228515625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 936304828, + "step": 9657 + }, + { + "epoch": 0.9442706296441142, + "grad_norm": 4.081600232256501, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 936402660, + "step": 9658 + }, + { + "epoch": 0.9442706296441142, + "loss": 0.09590322524309158, + "loss_ce": 0.005899258889257908, + "loss_iou": 0.263671875, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 936402660, + "step": 9658 + }, + { + "epoch": 0.9443684004692999, + "grad_norm": 5.306980588827087, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 936499960, + "step": 9659 + }, + { + "epoch": 0.9443684004692999, + "loss": 0.06850619614124298, + "loss_ce": 0.002923925407230854, + "loss_iou": 0.1484375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 936499960, + "step": 9659 + }, + { + "epoch": 0.9444661712944857, + "grad_norm": 12.931316147477965, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 936597560, + "step": 9660 + }, + { + "epoch": 0.9444661712944857, + "loss": 0.0863737165927887, + "loss_ce": 0.00851001963019371, + "loss_iou": 0.28125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 936597560, + "step": 9660 + }, + { + "epoch": 0.9445639421196715, + "grad_norm": 11.983165760853396, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 936695204, + "step": 9661 + }, + { + "epoch": 0.9445639421196715, + "loss": 0.052117373794317245, + "loss_ce": 0.003350283484905958, + "loss_iou": 0.380859375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 936695204, + "step": 9661 + }, + { + "epoch": 0.9446617129448572, + "grad_norm": 14.873512545244747, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 936792092, + "step": 9662 + }, + { + "epoch": 0.9446617129448572, + "loss": 0.07335762679576874, + "loss_ce": 0.008832015097141266, + "loss_iou": 0.203125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 936792092, + "step": 9662 + }, + { + "epoch": 0.944759483770043, + "grad_norm": 8.972286086618052, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 936888364, + "step": 9663 + }, + { + "epoch": 0.944759483770043, + "loss": 0.042894452810287476, + "loss_ce": 0.004922953434288502, + "loss_iou": 0.236328125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 936888364, + "step": 9663 + }, + { + "epoch": 0.9448572545952287, + "grad_norm": 3.4085190466774797, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 936984960, + "step": 9664 + }, + { + "epoch": 0.9448572545952287, + "loss": 0.05557706579566002, + "loss_ce": 0.008579996414482594, + "loss_iou": 0.30078125, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 936984960, + "step": 9664 + }, + { + "epoch": 0.9449550254204145, + "grad_norm": 6.8841757980179885, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 937081608, + "step": 9665 + }, + { + "epoch": 0.9449550254204145, + "loss": 0.047858886420726776, + "loss_ce": 0.0047489944845438, + "loss_iou": 0.2578125, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 937081608, + "step": 9665 + }, + { + "epoch": 0.9450527962456003, + "grad_norm": 12.769572489613733, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 937178732, + "step": 9666 + }, + { + "epoch": 0.9450527962456003, + "loss": 0.06865419447422028, + "loss_ce": 0.004399430938065052, + "loss_iou": 0.2734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 937178732, + "step": 9666 + }, + { + "epoch": 0.945150567070786, + "grad_norm": 5.811677302786668, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 937276416, + "step": 9667 + }, + { + "epoch": 0.945150567070786, + "loss": 0.05189715325832367, + "loss_ce": 0.004320252686738968, + "loss_iou": 0.25390625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 937276416, + "step": 9667 + }, + { + "epoch": 0.9452483378959718, + "grad_norm": 3.0820189281234813, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 937373460, + "step": 9668 + }, + { + "epoch": 0.9452483378959718, + "loss": 0.11865687370300293, + "loss_ce": 0.0014998915139585733, + "loss_iou": 0.2314453125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 937373460, + "step": 9668 + }, + { + "epoch": 0.9453461087211577, + "grad_norm": 4.610907018825278, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 937470396, + "step": 9669 + }, + { + "epoch": 0.9453461087211577, + "loss": 0.04148072749376297, + "loss_ce": 0.0020825322717428207, + "loss_iou": 0.357421875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 937470396, + "step": 9669 + }, + { + "epoch": 0.9454438795463433, + "grad_norm": 4.099235807837352, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 937567432, + "step": 9670 + }, + { + "epoch": 0.9454438795463433, + "loss": 0.06712552160024643, + "loss_ce": 0.003999912180006504, + "loss_iou": 0.1865234375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 937567432, + "step": 9670 + }, + { + "epoch": 0.9455416503715292, + "grad_norm": 2.3390620692610713, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 937664020, + "step": 9671 + }, + { + "epoch": 0.9455416503715292, + "loss": 0.0656842291355133, + "loss_ce": 0.008257776498794556, + "loss_iou": 0.15234375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 937664020, + "step": 9671 + }, + { + "epoch": 0.9456394211967148, + "grad_norm": 3.328621922620124, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 937760712, + "step": 9672 + }, + { + "epoch": 0.9456394211967148, + "loss": 0.07227280735969543, + "loss_ce": 0.003913433291018009, + "loss_iou": 0.2236328125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 937760712, + "step": 9672 + }, + { + "epoch": 0.9457371920219007, + "grad_norm": 4.13574087171782, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 937857880, + "step": 9673 + }, + { + "epoch": 0.9457371920219007, + "loss": 0.03286520391702652, + "loss_ce": 0.0031487145461142063, + "loss_iou": 0.1767578125, + "loss_num": 0.005950927734375, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 937857880, + "step": 9673 + }, + { + "epoch": 0.9458349628470865, + "grad_norm": 8.291585053769998, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 937954988, + "step": 9674 + }, + { + "epoch": 0.9458349628470865, + "loss": 0.08907492458820343, + "loss_ce": 0.0037172590382397175, + "loss_iou": 0.259765625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 937954988, + "step": 9674 + }, + { + "epoch": 0.9459327336722722, + "grad_norm": 30.583883107275863, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 938051584, + "step": 9675 + }, + { + "epoch": 0.9459327336722722, + "loss": 0.06149168312549591, + "loss_ce": 0.0038516114000231028, + "loss_iou": 0.27734375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 938051584, + "step": 9675 + }, + { + "epoch": 0.946030504497458, + "grad_norm": 15.905390030836944, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 938148492, + "step": 9676 + }, + { + "epoch": 0.946030504497458, + "loss": 0.04976149648427963, + "loss_ce": 0.005572041962295771, + "loss_iou": 0.30078125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 938148492, + "step": 9676 + }, + { + "epoch": 0.9461282753226438, + "grad_norm": 6.9237434399639035, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 938245720, + "step": 9677 + }, + { + "epoch": 0.9461282753226438, + "loss": 0.07897710800170898, + "loss_ce": 0.004071718081831932, + "loss_iou": 0.298828125, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 938245720, + "step": 9677 + }, + { + "epoch": 0.9462260461478295, + "grad_norm": 7.181324556094628, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 938342176, + "step": 9678 + }, + { + "epoch": 0.9462260461478295, + "loss": 0.05191273242235184, + "loss_ce": 0.004679148551076651, + "loss_iou": 0.21484375, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 938342176, + "step": 9678 + }, + { + "epoch": 0.9463238169730153, + "grad_norm": 20.081610539810395, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 938439852, + "step": 9679 + }, + { + "epoch": 0.9463238169730153, + "loss": 0.12151360511779785, + "loss_ce": 0.003685231553390622, + "loss_iou": 0.330078125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 938439852, + "step": 9679 + }, + { + "epoch": 0.9464215877982011, + "grad_norm": 6.218044156636848, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 938536636, + "step": 9680 + }, + { + "epoch": 0.9464215877982011, + "loss": 0.061713941395282745, + "loss_ce": 0.002418283838778734, + "loss_iou": 0.35546875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 938536636, + "step": 9680 + }, + { + "epoch": 0.9465193586233868, + "grad_norm": 4.509294710259916, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 938633428, + "step": 9681 + }, + { + "epoch": 0.9465193586233868, + "loss": 0.045702554285526276, + "loss_ce": 0.0040308041498064995, + "loss_iou": 0.28515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 938633428, + "step": 9681 + }, + { + "epoch": 0.9466171294485726, + "grad_norm": 9.563987259455683, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 938728972, + "step": 9682 + }, + { + "epoch": 0.9466171294485726, + "loss": 0.06953069567680359, + "loss_ce": 0.006204811856150627, + "loss_iou": 0.203125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 938728972, + "step": 9682 + }, + { + "epoch": 0.9467149002737583, + "grad_norm": 6.870773085413348, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 938825836, + "step": 9683 + }, + { + "epoch": 0.9467149002737583, + "loss": 0.06677310168743134, + "loss_ce": 0.0052649229764938354, + "loss_iou": 0.322265625, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 938825836, + "step": 9683 + }, + { + "epoch": 0.9468126710989441, + "grad_norm": 6.51295357429456, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 938922132, + "step": 9684 + }, + { + "epoch": 0.9468126710989441, + "loss": 0.04806888848543167, + "loss_ce": 0.006973154842853546, + "loss_iou": 0.25, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 938922132, + "step": 9684 + }, + { + "epoch": 0.9469104419241299, + "grad_norm": 15.839663139202063, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 939019200, + "step": 9685 + }, + { + "epoch": 0.9469104419241299, + "loss": 0.15331009030342102, + "loss_ce": 0.022900845855474472, + "loss_iou": 0.240234375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 939019200, + "step": 9685 + }, + { + "epoch": 0.9470082127493156, + "grad_norm": 16.293057724789506, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 939116428, + "step": 9686 + }, + { + "epoch": 0.9470082127493156, + "loss": 0.06847003102302551, + "loss_ce": 0.0030174541752785444, + "loss_iou": 0.205078125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 939116428, + "step": 9686 + }, + { + "epoch": 0.9471059835745014, + "grad_norm": 12.735163959513741, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 939213064, + "step": 9687 + }, + { + "epoch": 0.9471059835745014, + "loss": 0.0593128427863121, + "loss_ce": 0.0032978313975036144, + "loss_iou": 0.296875, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 939213064, + "step": 9687 + }, + { + "epoch": 0.9472037543996872, + "grad_norm": 10.164108967554549, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 939309536, + "step": 9688 + }, + { + "epoch": 0.9472037543996872, + "loss": 0.07619412988424301, + "loss_ce": 0.008780798874795437, + "loss_iou": 0.208984375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 939309536, + "step": 9688 + }, + { + "epoch": 0.9473015252248729, + "grad_norm": 6.095578130787795, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 939406736, + "step": 9689 + }, + { + "epoch": 0.9473015252248729, + "loss": 0.04895268380641937, + "loss_ce": 0.004915817175060511, + "loss_iou": 0.296875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 939406736, + "step": 9689 + }, + { + "epoch": 0.9473992960500587, + "grad_norm": 4.426675147544393, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 939504388, + "step": 9690 + }, + { + "epoch": 0.9473992960500587, + "loss": 0.06986910104751587, + "loss_ce": 0.006133144721388817, + "loss_iou": 0.330078125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 939504388, + "step": 9690 + }, + { + "epoch": 0.9474970668752444, + "grad_norm": 14.091135914806813, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 939602264, + "step": 9691 + }, + { + "epoch": 0.9474970668752444, + "loss": 0.06659123301506042, + "loss_ce": 0.005998581647872925, + "loss_iou": 0.341796875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 939602264, + "step": 9691 + }, + { + "epoch": 0.9475948377004302, + "grad_norm": 7.197787374382957, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 939699776, + "step": 9692 + }, + { + "epoch": 0.9475948377004302, + "loss": 0.09121736884117126, + "loss_ce": 0.006729455664753914, + "loss_iou": 0.40625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 939699776, + "step": 9692 + }, + { + "epoch": 0.947692608525616, + "grad_norm": 4.6663928437025834, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 939796788, + "step": 9693 + }, + { + "epoch": 0.947692608525616, + "loss": 0.041549202054739, + "loss_ce": 0.004577153827995062, + "loss_iou": 0.2734375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 939796788, + "step": 9693 + }, + { + "epoch": 0.9477903793508017, + "grad_norm": 14.442463855957438, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 939894116, + "step": 9694 + }, + { + "epoch": 0.9477903793508017, + "loss": 0.08439506590366364, + "loss_ce": 0.009749065153300762, + "loss_iou": 0.359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 939894116, + "step": 9694 + }, + { + "epoch": 0.9478881501759875, + "grad_norm": 14.518924685889827, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 939990080, + "step": 9695 + }, + { + "epoch": 0.9478881501759875, + "loss": 0.07936648279428482, + "loss_ce": 0.007001679856330156, + "loss_iou": 0.30078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 939990080, + "step": 9695 + }, + { + "epoch": 0.9479859210011733, + "grad_norm": 20.505111365750878, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 940086360, + "step": 9696 + }, + { + "epoch": 0.9479859210011733, + "loss": 0.08485876023769379, + "loss_ce": 0.008351187221705914, + "loss_iou": 0.251953125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 940086360, + "step": 9696 + }, + { + "epoch": 0.948083691826359, + "grad_norm": 8.667216071267413, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 940183012, + "step": 9697 + }, + { + "epoch": 0.948083691826359, + "loss": 0.05409932881593704, + "loss_ce": 0.0038445075042545795, + "loss_iou": 0.2314453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 940183012, + "step": 9697 + }, + { + "epoch": 0.9481814626515448, + "grad_norm": 3.226190689719295, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 940279676, + "step": 9698 + }, + { + "epoch": 0.9481814626515448, + "loss": 0.053589217364788055, + "loss_ce": 0.0027316755149513483, + "loss_iou": 0.2734375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 940279676, + "step": 9698 + }, + { + "epoch": 0.9482792334767306, + "grad_norm": 14.697396741875632, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 940376152, + "step": 9699 + }, + { + "epoch": 0.9482792334767306, + "loss": 0.05874071270227432, + "loss_ce": 0.00315294461324811, + "loss_iou": 0.2109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 940376152, + "step": 9699 + }, + { + "epoch": 0.9483770043019163, + "grad_norm": 14.889984712714943, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 940473116, + "step": 9700 + }, + { + "epoch": 0.9483770043019163, + "loss": 0.05392385274171829, + "loss_ce": 0.006041772197932005, + "loss_iou": 0.2333984375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 940473116, + "step": 9700 + }, + { + "epoch": 0.9484747751271021, + "grad_norm": 9.182805236419606, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 940570372, + "step": 9701 + }, + { + "epoch": 0.9484747751271021, + "loss": 0.057195425033569336, + "loss_ce": 0.00452971737831831, + "loss_iou": 0.2431640625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 940570372, + "step": 9701 + }, + { + "epoch": 0.9485725459522878, + "grad_norm": 3.5003985215615407, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 940667156, + "step": 9702 + }, + { + "epoch": 0.9485725459522878, + "loss": 0.06218111887574196, + "loss_ce": 0.004731778986752033, + "loss_iou": 0.21875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 940667156, + "step": 9702 + }, + { + "epoch": 0.9486703167774736, + "grad_norm": 9.965381249470608, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 940764516, + "step": 9703 + }, + { + "epoch": 0.9486703167774736, + "loss": 0.07607556879520416, + "loss_ce": 0.0070905801840126514, + "loss_iou": 0.291015625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 940764516, + "step": 9703 + }, + { + "epoch": 0.9487680876026594, + "grad_norm": 4.004072174503612, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 940861388, + "step": 9704 + }, + { + "epoch": 0.9487680876026594, + "loss": 0.052263181656599045, + "loss_ce": 0.005083004012703896, + "loss_iou": 0.4453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 940861388, + "step": 9704 + }, + { + "epoch": 0.9488658584278451, + "grad_norm": 3.580203901142979, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 940958448, + "step": 9705 + }, + { + "epoch": 0.9488658584278451, + "loss": 0.08597110956907272, + "loss_ce": 0.005099529400467873, + "loss_iou": 0.396484375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 940958448, + "step": 9705 + }, + { + "epoch": 0.9489636292530309, + "grad_norm": 13.627553589694662, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 941056268, + "step": 9706 + }, + { + "epoch": 0.9489636292530309, + "loss": 0.05069718509912491, + "loss_ce": 0.004531721118837595, + "loss_iou": 0.31640625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 941056268, + "step": 9706 + }, + { + "epoch": 0.9490614000782167, + "grad_norm": 10.990360262746353, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 941152400, + "step": 9707 + }, + { + "epoch": 0.9490614000782167, + "loss": 0.08402176201343536, + "loss_ce": 0.004920193925499916, + "loss_iou": 0.31640625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 941152400, + "step": 9707 + }, + { + "epoch": 0.9491591709034024, + "grad_norm": 5.328531363143151, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 941249480, + "step": 9708 + }, + { + "epoch": 0.9491591709034024, + "loss": 0.05935103818774223, + "loss_ce": 0.002889706054702401, + "loss_iou": 0.314453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 941249480, + "step": 9708 + }, + { + "epoch": 0.9492569417285882, + "grad_norm": 15.23422734497882, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 941346060, + "step": 9709 + }, + { + "epoch": 0.9492569417285882, + "loss": 0.09581804275512695, + "loss_ce": 0.004593370947986841, + "loss_iou": 0.263671875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 941346060, + "step": 9709 + }, + { + "epoch": 0.9493547125537739, + "grad_norm": 13.644162136570166, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 941443768, + "step": 9710 + }, + { + "epoch": 0.9493547125537739, + "loss": 0.053636081516742706, + "loss_ce": 0.006074438337236643, + "loss_iou": 0.3125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 941443768, + "step": 9710 + }, + { + "epoch": 0.9494524833789597, + "grad_norm": 5.348833181936891, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 941541132, + "step": 9711 + }, + { + "epoch": 0.9494524833789597, + "loss": 0.09037866443395615, + "loss_ce": 0.005539795383810997, + "loss_iou": 0.341796875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 941541132, + "step": 9711 + }, + { + "epoch": 0.9495502542041455, + "grad_norm": 4.45177843746269, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 941638184, + "step": 9712 + }, + { + "epoch": 0.9495502542041455, + "loss": 0.07530216127634048, + "loss_ce": 0.009452858939766884, + "loss_iou": 0.390625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 941638184, + "step": 9712 + }, + { + "epoch": 0.9496480250293312, + "grad_norm": 5.141928504882548, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 941734748, + "step": 9713 + }, + { + "epoch": 0.9496480250293312, + "loss": 0.11647176742553711, + "loss_ce": 0.0032973322086036205, + "loss_iou": 0.3125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 941734748, + "step": 9713 + }, + { + "epoch": 0.949745795854517, + "grad_norm": 9.907271360844486, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 941831216, + "step": 9714 + }, + { + "epoch": 0.949745795854517, + "loss": 0.0519280880689621, + "loss_ce": 0.0063653443939983845, + "loss_iou": 0.22265625, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 941831216, + "step": 9714 + }, + { + "epoch": 0.9498435666797028, + "grad_norm": 6.627496305296488, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 941929052, + "step": 9715 + }, + { + "epoch": 0.9498435666797028, + "loss": 0.058174215257167816, + "loss_ce": 0.0032883514650166035, + "loss_iou": 0.3359375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 941929052, + "step": 9715 + }, + { + "epoch": 0.9499413375048885, + "grad_norm": 5.7546111141047165, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 942025824, + "step": 9716 + }, + { + "epoch": 0.9499413375048885, + "loss": 0.06768542528152466, + "loss_ce": 0.005826292559504509, + "loss_iou": 0.318359375, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 942025824, + "step": 9716 + }, + { + "epoch": 0.9500391083300743, + "grad_norm": 10.44771619336282, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 942123972, + "step": 9717 + }, + { + "epoch": 0.9500391083300743, + "loss": 0.09921129047870636, + "loss_ce": 0.005888540297746658, + "loss_iou": 0.333984375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 942123972, + "step": 9717 + }, + { + "epoch": 0.95013687915526, + "grad_norm": 10.636710167426951, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 942220524, + "step": 9718 + }, + { + "epoch": 0.95013687915526, + "loss": 0.06575840711593628, + "loss_ce": 0.0032660355791449547, + "loss_iou": 0.2119140625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 942220524, + "step": 9718 + }, + { + "epoch": 0.9502346499804458, + "grad_norm": 4.986381817038202, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 942316968, + "step": 9719 + }, + { + "epoch": 0.9502346499804458, + "loss": 0.08260546624660492, + "loss_ce": 0.0024357896763831377, + "loss_iou": 0.21875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 942316968, + "step": 9719 + }, + { + "epoch": 0.9503324208056316, + "grad_norm": 15.433796166889431, + "learning_rate": 5e-05, + "loss": 0.1407, + "num_input_tokens_seen": 942413988, + "step": 9720 + }, + { + "epoch": 0.9503324208056316, + "loss": 0.17333610355854034, + "loss_ce": 0.004146639257669449, + "loss_iou": 0.365234375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 942413988, + "step": 9720 + }, + { + "epoch": 0.9504301916308173, + "grad_norm": 30.561000471600146, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 942510600, + "step": 9721 + }, + { + "epoch": 0.9504301916308173, + "loss": 0.05649738386273384, + "loss_ce": 0.003953742329031229, + "loss_iou": 0.32421875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 942510600, + "step": 9721 + }, + { + "epoch": 0.9505279624560031, + "grad_norm": 14.690388982912397, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 942607220, + "step": 9722 + }, + { + "epoch": 0.9505279624560031, + "loss": 0.06805668771266937, + "loss_ce": 0.0055872052907943726, + "loss_iou": 0.275390625, + "loss_num": 0.012451171875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 942607220, + "step": 9722 + }, + { + "epoch": 0.9506257332811889, + "grad_norm": 4.657441160626371, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 942704096, + "step": 9723 + }, + { + "epoch": 0.9506257332811889, + "loss": 0.09019254893064499, + "loss_ce": 0.00435422919690609, + "loss_iou": 0.27734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 942704096, + "step": 9723 + }, + { + "epoch": 0.9507235041063746, + "grad_norm": 6.212777484092898, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 942800260, + "step": 9724 + }, + { + "epoch": 0.9507235041063746, + "loss": 0.04175601154565811, + "loss_ce": 0.004211762920022011, + "loss_iou": 0.197265625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 942800260, + "step": 9724 + }, + { + "epoch": 0.9508212749315604, + "grad_norm": 9.264272608036093, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 942897580, + "step": 9725 + }, + { + "epoch": 0.9508212749315604, + "loss": 0.08824487030506134, + "loss_ce": 0.0038332464173436165, + "loss_iou": 0.314453125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 942897580, + "step": 9725 + }, + { + "epoch": 0.9509190457567462, + "grad_norm": 3.9242151054674643, + "learning_rate": 5e-05, + "loss": 0.0745, + "num_input_tokens_seen": 942995116, + "step": 9726 + }, + { + "epoch": 0.9509190457567462, + "loss": 0.10722807794809341, + "loss_ce": 0.00392225943505764, + "loss_iou": 0.30078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 942995116, + "step": 9726 + }, + { + "epoch": 0.9510168165819319, + "grad_norm": 5.283694248972883, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 943092792, + "step": 9727 + }, + { + "epoch": 0.9510168165819319, + "loss": 0.06923121213912964, + "loss_ce": 0.00407617911696434, + "loss_iou": 0.306640625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 943092792, + "step": 9727 + }, + { + "epoch": 0.9511145874071177, + "grad_norm": 6.55372168330774, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 943190296, + "step": 9728 + }, + { + "epoch": 0.9511145874071177, + "loss": 0.09276985377073288, + "loss_ce": 0.003277053590863943, + "loss_iou": 0.29296875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 943190296, + "step": 9728 + }, + { + "epoch": 0.9512123582323034, + "grad_norm": 11.592532754418814, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 943287368, + "step": 9729 + }, + { + "epoch": 0.9512123582323034, + "loss": 0.06930019706487656, + "loss_ce": 0.003187679685652256, + "loss_iou": 0.2314453125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 943287368, + "step": 9729 + }, + { + "epoch": 0.9513101290574892, + "grad_norm": 3.4094702081848016, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 943384672, + "step": 9730 + }, + { + "epoch": 0.9513101290574892, + "loss": 0.038384612649679184, + "loss_ce": 0.005646878853440285, + "loss_iou": 0.26171875, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 943384672, + "step": 9730 + }, + { + "epoch": 0.951407899882675, + "grad_norm": 3.879111632379042, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 943481464, + "step": 9731 + }, + { + "epoch": 0.951407899882675, + "loss": 0.048032816499471664, + "loss_ce": 0.006101662293076515, + "loss_iou": 0.2734375, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 943481464, + "step": 9731 + }, + { + "epoch": 0.9515056707078607, + "grad_norm": 2.551076529363295, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 943578348, + "step": 9732 + }, + { + "epoch": 0.9515056707078607, + "loss": 0.07947301864624023, + "loss_ce": 0.0050711664371192455, + "loss_iou": 0.265625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 943578348, + "step": 9732 + }, + { + "epoch": 0.9516034415330465, + "grad_norm": 11.555897897801687, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 943675560, + "step": 9733 + }, + { + "epoch": 0.9516034415330465, + "loss": 0.05500427260994911, + "loss_ce": 0.003109131008386612, + "loss_iou": 0.361328125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 943675560, + "step": 9733 + }, + { + "epoch": 0.9517012123582324, + "grad_norm": 6.1563511525801315, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 943771944, + "step": 9734 + }, + { + "epoch": 0.9517012123582324, + "loss": 0.1125105768442154, + "loss_ce": 0.010455988347530365, + "loss_iou": 0.1943359375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 943771944, + "step": 9734 + }, + { + "epoch": 0.951798983183418, + "grad_norm": 8.723144409432374, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 943867796, + "step": 9735 + }, + { + "epoch": 0.951798983183418, + "loss": 0.08180207014083862, + "loss_ce": 0.006190961692482233, + "loss_iou": 0.25390625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 943867796, + "step": 9735 + }, + { + "epoch": 0.9518967540086039, + "grad_norm": 4.727184363926456, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 943965080, + "step": 9736 + }, + { + "epoch": 0.9518967540086039, + "loss": 0.10552197694778442, + "loss_ce": 0.006904426496475935, + "loss_iou": 0.263671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 943965080, + "step": 9736 + }, + { + "epoch": 0.9519945248337895, + "grad_norm": 18.96936893517589, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 944060680, + "step": 9737 + }, + { + "epoch": 0.9519945248337895, + "loss": 0.06912234425544739, + "loss_ce": 0.0026169149205088615, + "loss_iou": 0.2734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 944060680, + "step": 9737 + }, + { + "epoch": 0.9520922956589754, + "grad_norm": 40.53402708809422, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 944158364, + "step": 9738 + }, + { + "epoch": 0.9520922956589754, + "loss": 0.09572159498929977, + "loss_ce": 0.0038102762773633003, + "loss_iou": 0.265625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 944158364, + "step": 9738 + }, + { + "epoch": 0.9521900664841612, + "grad_norm": 10.105285935625645, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 944255688, + "step": 9739 + }, + { + "epoch": 0.9521900664841612, + "loss": 0.08246386796236038, + "loss_ce": 0.005986818112432957, + "loss_iou": 0.2421875, + "loss_num": 0.0152587890625, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 944255688, + "step": 9739 + }, + { + "epoch": 0.9522878373093469, + "grad_norm": 34.85226775227918, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 944352512, + "step": 9740 + }, + { + "epoch": 0.9522878373093469, + "loss": 0.1256864368915558, + "loss_ce": 0.0051877787336707115, + "loss_iou": 0.294921875, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 944352512, + "step": 9740 + }, + { + "epoch": 0.9523856081345327, + "grad_norm": 6.566840271821642, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 944449644, + "step": 9741 + }, + { + "epoch": 0.9523856081345327, + "loss": 0.0803205817937851, + "loss_ce": 0.005811912007629871, + "loss_iou": 0.33984375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 944449644, + "step": 9741 + }, + { + "epoch": 0.9524833789597185, + "grad_norm": 46.63108802283725, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 944547456, + "step": 9742 + }, + { + "epoch": 0.9524833789597185, + "loss": 0.12220847606658936, + "loss_ce": 0.0036400617100298405, + "loss_iou": 0.3515625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 944547456, + "step": 9742 + }, + { + "epoch": 0.9525811497849042, + "grad_norm": 20.84831304556672, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 944644244, + "step": 9743 + }, + { + "epoch": 0.9525811497849042, + "loss": 0.08743087947368622, + "loss_ce": 0.004629057366400957, + "loss_iou": 0.3828125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 944644244, + "step": 9743 + }, + { + "epoch": 0.95267892061009, + "grad_norm": 7.3847874235497075, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 944740420, + "step": 9744 + }, + { + "epoch": 0.95267892061009, + "loss": 0.10404205322265625, + "loss_ce": 0.005012514069676399, + "loss_iou": 0.333984375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 944740420, + "step": 9744 + }, + { + "epoch": 0.9527766914352758, + "grad_norm": 4.520005576726863, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 944836856, + "step": 9745 + }, + { + "epoch": 0.9527766914352758, + "loss": 0.05088307708501816, + "loss_ce": 0.0033900965936481953, + "loss_iou": 0.1962890625, + "loss_num": 0.00946044921875, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 944836856, + "step": 9745 + }, + { + "epoch": 0.9528744622604615, + "grad_norm": 5.658716735823713, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 944934516, + "step": 9746 + }, + { + "epoch": 0.9528744622604615, + "loss": 0.07750917971134186, + "loss_ce": 0.002924215281382203, + "loss_iou": 0.361328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 944934516, + "step": 9746 + }, + { + "epoch": 0.9529722330856473, + "grad_norm": 4.311229549672704, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 945032168, + "step": 9747 + }, + { + "epoch": 0.9529722330856473, + "loss": 0.07122991979122162, + "loss_ce": 0.004732110537588596, + "loss_iou": 0.279296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 945032168, + "step": 9747 + }, + { + "epoch": 0.953070003910833, + "grad_norm": 5.84508477867218, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 945129152, + "step": 9748 + }, + { + "epoch": 0.953070003910833, + "loss": 0.07113848626613617, + "loss_ce": 0.00393115496262908, + "loss_iou": 0.14453125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 945129152, + "step": 9748 + }, + { + "epoch": 0.9531677747360188, + "grad_norm": 3.709547622815805, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 945226124, + "step": 9749 + }, + { + "epoch": 0.9531677747360188, + "loss": 0.08709172904491425, + "loss_ce": 0.004877367056906223, + "loss_iou": 0.2119140625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 945226124, + "step": 9749 + }, + { + "epoch": 0.9532655455612046, + "grad_norm": 9.882066738029692, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 945322452, + "step": 9750 + }, + { + "epoch": 0.9532655455612046, + "eval_seeclick_CIoU": 0.6319297552108765, + "eval_seeclick_GIoU": 0.6317127048969269, + "eval_seeclick_IoU": 0.6602177917957306, + "eval_seeclick_MAE_all": 0.05440343730151653, + "eval_seeclick_MAE_h": 0.026866359636187553, + "eval_seeclick_MAE_w": 0.07924913242459297, + "eval_seeclick_MAE_x": 0.08733964338898659, + "eval_seeclick_MAE_y": 0.02415861329063773, + "eval_seeclick_NUM_probability": 0.9999994039535522, + "eval_seeclick_inside_bbox": 0.890625, + "eval_seeclick_loss": 0.22266656160354614, + "eval_seeclick_loss_ce": 0.009838145226240158, + "eval_seeclick_loss_iou": 0.39703369140625, + "eval_seeclick_loss_num": 0.0434112548828125, + "eval_seeclick_loss_xval": 0.2171630859375, + "eval_seeclick_runtime": 75.4498, + "eval_seeclick_samples_per_second": 0.57, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 945322452, + "step": 9750 + }, + { + "epoch": 0.9532655455612046, + "eval_icons_CIoU": 0.6730327606201172, + "eval_icons_GIoU": 0.6756770610809326, + "eval_icons_IoU": 0.7041727304458618, + "eval_icons_MAE_all": 0.05918102525174618, + "eval_icons_MAE_h": 0.07148963026702404, + "eval_icons_MAE_w": 0.04823835380375385, + "eval_icons_MAE_x": 0.04788687638938427, + "eval_icons_MAE_y": 0.06910926103591919, + "eval_icons_NUM_probability": 0.9999990165233612, + "eval_icons_inside_bbox": 0.8194444477558136, + "eval_icons_loss": 0.18413197994232178, + "eval_icons_loss_ce": 1.080702134004241e-06, + "eval_icons_loss_iou": 0.38885498046875, + "eval_icons_loss_num": 0.03922462463378906, + "eval_icons_loss_xval": 0.1960601806640625, + "eval_icons_runtime": 98.3835, + "eval_icons_samples_per_second": 0.508, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 945322452, + "step": 9750 + }, + { + "epoch": 0.9532655455612046, + "eval_screenspot_CIoU": 0.3078445643186569, + "eval_screenspot_GIoU": 0.2861725812156995, + "eval_screenspot_IoU": 0.40359564622243244, + "eval_screenspot_MAE_all": 0.16663232694069544, + "eval_screenspot_MAE_h": 0.13129933923482895, + "eval_screenspot_MAE_w": 0.19906522085269293, + "eval_screenspot_MAE_x": 0.2135541538397471, + "eval_screenspot_MAE_y": 0.12261057645082474, + "eval_screenspot_NUM_probability": 0.999997615814209, + "eval_screenspot_inside_bbox": 0.637500007947286, + "eval_screenspot_loss": 0.5749945044517517, + "eval_screenspot_loss_ce": 0.013906359982987246, + "eval_screenspot_loss_iou": 0.3511555989583333, + "eval_screenspot_loss_num": 0.115234375, + "eval_screenspot_loss_xval": 0.576416015625, + "eval_screenspot_runtime": 153.6683, + "eval_screenspot_samples_per_second": 0.579, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 945322452, + "step": 9750 + }, + { + "epoch": 0.9532655455612046, + "eval_compot_CIoU": 0.39180782437324524, + "eval_compot_GIoU": 0.383696049451828, + "eval_compot_IoU": 0.48041655123233795, + "eval_compot_MAE_all": 0.10453876107931137, + "eval_compot_MAE_h": 0.09726579859852791, + "eval_compot_MAE_w": 0.11238951236009598, + "eval_compot_MAE_x": 0.10832051187753677, + "eval_compot_MAE_y": 0.10017919912934303, + "eval_compot_NUM_probability": 0.9999960660934448, + "eval_compot_inside_bbox": 0.6527777910232544, + "eval_compot_loss": 0.31885719299316406, + "eval_compot_loss_ce": 0.026074156165122986, + "eval_compot_loss_iou": 0.4591064453125, + "eval_compot_loss_num": 0.0511474609375, + "eval_compot_loss_xval": 0.255950927734375, + "eval_compot_runtime": 86.0432, + "eval_compot_samples_per_second": 0.581, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 945322452, + "step": 9750 + }, + { + "epoch": 0.9532655455612046, + "loss": 0.2813102602958679, + "loss_ce": 0.018462374806404114, + "loss_iou": 0.46875, + "loss_num": 0.052490234375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 945322452, + "step": 9750 + }, + { + "epoch": 0.9533633163863903, + "grad_norm": 14.92637122480149, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 945419548, + "step": 9751 + }, + { + "epoch": 0.9533633163863903, + "loss": 0.051068685948848724, + "loss_ce": 0.0032323843333870173, + "loss_iou": 0.3125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 945419548, + "step": 9751 + }, + { + "epoch": 0.9534610872115761, + "grad_norm": 2.521115470451947, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 945516600, + "step": 9752 + }, + { + "epoch": 0.9534610872115761, + "loss": 0.12081839144229889, + "loss_ce": 0.0058891866356134415, + "loss_iou": 0.314453125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 945516600, + "step": 9752 + }, + { + "epoch": 0.9535588580367619, + "grad_norm": 4.8212927054695, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 945613696, + "step": 9753 + }, + { + "epoch": 0.9535588580367619, + "loss": 0.07971261441707611, + "loss_ce": 0.004730928689241409, + "loss_iou": 0.267578125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 945613696, + "step": 9753 + }, + { + "epoch": 0.9536566288619476, + "grad_norm": 2.6330585751514666, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 945710216, + "step": 9754 + }, + { + "epoch": 0.9536566288619476, + "loss": 0.061636921018362045, + "loss_ce": 0.00421809870749712, + "loss_iou": 0.296875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 945710216, + "step": 9754 + }, + { + "epoch": 0.9537543996871334, + "grad_norm": 2.826152515968904, + "learning_rate": 5e-05, + "loss": 0.0851, + "num_input_tokens_seen": 945806856, + "step": 9755 + }, + { + "epoch": 0.9537543996871334, + "loss": 0.08374551683664322, + "loss_ce": 0.004689732566475868, + "loss_iou": 0.2373046875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 945806856, + "step": 9755 + }, + { + "epoch": 0.9538521705123191, + "grad_norm": 4.086561572932803, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 945904656, + "step": 9756 + }, + { + "epoch": 0.9538521705123191, + "loss": 0.061313480138778687, + "loss_ce": 0.011295165866613388, + "loss_iou": 0.2255859375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 945904656, + "step": 9756 + }, + { + "epoch": 0.9539499413375049, + "grad_norm": 7.668995415789187, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 946001484, + "step": 9757 + }, + { + "epoch": 0.9539499413375049, + "loss": 0.07193562388420105, + "loss_ce": 0.006231273058801889, + "loss_iou": 0.2734375, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 946001484, + "step": 9757 + }, + { + "epoch": 0.9540477121626907, + "grad_norm": 2.8900383741607687, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 946098156, + "step": 9758 + }, + { + "epoch": 0.9540477121626907, + "loss": 0.056748032569885254, + "loss_ce": 0.002869250951334834, + "loss_iou": 0.3125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 946098156, + "step": 9758 + }, + { + "epoch": 0.9541454829878764, + "grad_norm": 12.934176656649731, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 946195264, + "step": 9759 + }, + { + "epoch": 0.9541454829878764, + "loss": 0.12125033140182495, + "loss_ce": 0.006778899114578962, + "loss_iou": 0.255859375, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 946195264, + "step": 9759 + }, + { + "epoch": 0.9542432538130622, + "grad_norm": 13.790405529572794, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 946292640, + "step": 9760 + }, + { + "epoch": 0.9542432538130622, + "loss": 0.05949254333972931, + "loss_ce": 0.006437734700739384, + "loss_iou": 0.39453125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 946292640, + "step": 9760 + }, + { + "epoch": 0.954341024638248, + "grad_norm": 16.316579090520012, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 946389344, + "step": 9761 + }, + { + "epoch": 0.954341024638248, + "loss": 0.05985002592206001, + "loss_ce": 0.0034611716400831938, + "loss_iou": 0.2080078125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 946389344, + "step": 9761 + }, + { + "epoch": 0.9544387954634337, + "grad_norm": 8.353022550749808, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 946486048, + "step": 9762 + }, + { + "epoch": 0.9544387954634337, + "loss": 0.10679302364587784, + "loss_ce": 0.005337337497621775, + "loss_iou": 0.220703125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 946486048, + "step": 9762 + }, + { + "epoch": 0.9545365662886195, + "grad_norm": 4.548368701651988, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 946582908, + "step": 9763 + }, + { + "epoch": 0.9545365662886195, + "loss": 0.09371119737625122, + "loss_ce": 0.008826554752886295, + "loss_iou": 0.212890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 946582908, + "step": 9763 + }, + { + "epoch": 0.9546343371138052, + "grad_norm": 5.706872528598309, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 946679812, + "step": 9764 + }, + { + "epoch": 0.9546343371138052, + "loss": 0.09171447157859802, + "loss_ce": 0.009202574379742146, + "loss_iou": 0.169921875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 946679812, + "step": 9764 + }, + { + "epoch": 0.954732107938991, + "grad_norm": 12.547920091275449, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 946775652, + "step": 9765 + }, + { + "epoch": 0.954732107938991, + "loss": 0.07438141107559204, + "loss_ce": 0.004999702330678701, + "loss_iou": 0.228515625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 946775652, + "step": 9765 + }, + { + "epoch": 0.9548298787641768, + "grad_norm": 4.667829456994622, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 946872640, + "step": 9766 + }, + { + "epoch": 0.9548298787641768, + "loss": 0.08788204193115234, + "loss_ce": 0.0018072081729769707, + "loss_iou": 0.20703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 946872640, + "step": 9766 + }, + { + "epoch": 0.9549276495893625, + "grad_norm": 6.62861379263571, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 946969000, + "step": 9767 + }, + { + "epoch": 0.9549276495893625, + "loss": 0.07328134030103683, + "loss_ce": 0.005150844808667898, + "loss_iou": 0.328125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 946969000, + "step": 9767 + }, + { + "epoch": 0.9550254204145483, + "grad_norm": 7.9640917815057515, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 947065672, + "step": 9768 + }, + { + "epoch": 0.9550254204145483, + "loss": 0.05587610602378845, + "loss_ce": 0.0019591706804931164, + "loss_iou": 0.234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 947065672, + "step": 9768 + }, + { + "epoch": 0.9551231912397341, + "grad_norm": 19.765549450499684, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 947163404, + "step": 9769 + }, + { + "epoch": 0.9551231912397341, + "loss": 0.07938539236783981, + "loss_ce": 0.007608048617839813, + "loss_iou": 0.279296875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 947163404, + "step": 9769 + }, + { + "epoch": 0.9552209620649198, + "grad_norm": 8.342941512352075, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 947260092, + "step": 9770 + }, + { + "epoch": 0.9552209620649198, + "loss": 0.05477648600935936, + "loss_ce": 0.007324510719627142, + "loss_iou": 0.33984375, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 947260092, + "step": 9770 + }, + { + "epoch": 0.9553187328901056, + "grad_norm": 3.2986957037103286, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 947357132, + "step": 9771 + }, + { + "epoch": 0.9553187328901056, + "loss": 0.14362752437591553, + "loss_ce": 0.006573083344846964, + "loss_iou": 0.2470703125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 947357132, + "step": 9771 + }, + { + "epoch": 0.9554165037152914, + "grad_norm": 7.6758942155801355, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 947455380, + "step": 9772 + }, + { + "epoch": 0.9554165037152914, + "loss": 0.05607728660106659, + "loss_ce": 0.002396866912022233, + "loss_iou": 0.390625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 947455380, + "step": 9772 + }, + { + "epoch": 0.9555142745404771, + "grad_norm": 10.943195362523818, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 947552808, + "step": 9773 + }, + { + "epoch": 0.9555142745404771, + "loss": 0.06178382411599159, + "loss_ce": 0.0019083358347415924, + "loss_iou": 0.384765625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 947552808, + "step": 9773 + }, + { + "epoch": 0.9556120453656629, + "grad_norm": 13.151969579905833, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 947649928, + "step": 9774 + }, + { + "epoch": 0.9556120453656629, + "loss": 0.11487086117267609, + "loss_ce": 0.0008877089712768793, + "loss_iou": 0.3125, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 947649928, + "step": 9774 + }, + { + "epoch": 0.9557098161908486, + "grad_norm": 12.944490052342292, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 947746796, + "step": 9775 + }, + { + "epoch": 0.9557098161908486, + "loss": 0.062165092676877975, + "loss_ce": 0.0057533495128154755, + "loss_iou": 0.310546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 947746796, + "step": 9775 + }, + { + "epoch": 0.9558075870160344, + "grad_norm": 20.76484735294059, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 947844944, + "step": 9776 + }, + { + "epoch": 0.9558075870160344, + "loss": 0.04574362561106682, + "loss_ce": 0.005674044135957956, + "loss_iou": 0.369140625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 947844944, + "step": 9776 + }, + { + "epoch": 0.9559053578412202, + "grad_norm": 14.556394557448357, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 947942160, + "step": 9777 + }, + { + "epoch": 0.9559053578412202, + "loss": 0.03670661896467209, + "loss_ce": 0.0030609900131821632, + "loss_iou": 0.28515625, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 947942160, + "step": 9777 + }, + { + "epoch": 0.9560031286664059, + "grad_norm": 11.59758102181686, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 948039308, + "step": 9778 + }, + { + "epoch": 0.9560031286664059, + "loss": 0.07917962968349457, + "loss_ce": 0.0069139995612204075, + "loss_iou": 0.302734375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 948039308, + "step": 9778 + }, + { + "epoch": 0.9561008994915917, + "grad_norm": 13.087527428356603, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 948135712, + "step": 9779 + }, + { + "epoch": 0.9561008994915917, + "loss": 0.0460980199277401, + "loss_ce": 0.004746702499687672, + "loss_iou": 0.2490234375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 948135712, + "step": 9779 + }, + { + "epoch": 0.9561986703167775, + "grad_norm": 39.45892385467232, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 948233612, + "step": 9780 + }, + { + "epoch": 0.9561986703167775, + "loss": 0.03701009973883629, + "loss_ce": 0.0016707411268725991, + "loss_iou": 0.279296875, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 948233612, + "step": 9780 + }, + { + "epoch": 0.9562964411419632, + "grad_norm": 6.739456092401778, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 948330424, + "step": 9781 + }, + { + "epoch": 0.9562964411419632, + "loss": 0.07247646152973175, + "loss_ce": 0.0037508823443204165, + "loss_iou": 0.28515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 948330424, + "step": 9781 + }, + { + "epoch": 0.956394211967149, + "grad_norm": 14.403105805055285, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 948427424, + "step": 9782 + }, + { + "epoch": 0.956394211967149, + "loss": 0.07214930653572083, + "loss_ce": 0.008649855852127075, + "loss_iou": 0.220703125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 948427424, + "step": 9782 + }, + { + "epoch": 0.9564919827923347, + "grad_norm": 8.915943510917456, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 948524688, + "step": 9783 + }, + { + "epoch": 0.9564919827923347, + "loss": 0.09030900150537491, + "loss_ce": 0.004493569955229759, + "loss_iou": 0.302734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 948524688, + "step": 9783 + }, + { + "epoch": 0.9565897536175205, + "grad_norm": 21.9229611144564, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 948622684, + "step": 9784 + }, + { + "epoch": 0.9565897536175205, + "loss": 0.08420464396476746, + "loss_ce": 0.006339039653539658, + "loss_iou": 0.33984375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 948622684, + "step": 9784 + }, + { + "epoch": 0.9566875244427063, + "grad_norm": 7.05745279735086, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 948719132, + "step": 9785 + }, + { + "epoch": 0.9566875244427063, + "loss": 0.042663101106882095, + "loss_ce": 0.003131394274532795, + "loss_iou": 0.2734375, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 948719132, + "step": 9785 + }, + { + "epoch": 0.956785295267892, + "grad_norm": 2.4420669124493126, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 948815720, + "step": 9786 + }, + { + "epoch": 0.956785295267892, + "loss": 0.05176912248134613, + "loss_ce": 0.00725923478603363, + "loss_iou": 0.29296875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 948815720, + "step": 9786 + }, + { + "epoch": 0.9568830660930778, + "grad_norm": 3.18865458053745, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 948912056, + "step": 9787 + }, + { + "epoch": 0.9568830660930778, + "loss": 0.06952133774757385, + "loss_ce": 0.0069908141158521175, + "loss_iou": 0.265625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 948912056, + "step": 9787 + }, + { + "epoch": 0.9569808369182636, + "grad_norm": 11.616502608772324, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 949009924, + "step": 9788 + }, + { + "epoch": 0.9569808369182636, + "loss": 0.08241765946149826, + "loss_ce": 0.00630682148039341, + "loss_iou": 0.291015625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 949009924, + "step": 9788 + }, + { + "epoch": 0.9570786077434493, + "grad_norm": 13.103104482340457, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 949105764, + "step": 9789 + }, + { + "epoch": 0.9570786077434493, + "loss": 0.09152630716562271, + "loss_ce": 0.003032960928976536, + "loss_iou": 0.2255859375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 949105764, + "step": 9789 + }, + { + "epoch": 0.9571763785686351, + "grad_norm": 11.895216184459105, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 949202908, + "step": 9790 + }, + { + "epoch": 0.9571763785686351, + "loss": 0.12030157446861267, + "loss_ce": 0.01098761335015297, + "loss_iou": 0.29296875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 949202908, + "step": 9790 + }, + { + "epoch": 0.9572741493938209, + "grad_norm": 2.3825580601051364, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 949300156, + "step": 9791 + }, + { + "epoch": 0.9572741493938209, + "loss": 0.05490095168352127, + "loss_ce": 0.003648588666692376, + "loss_iou": 0.28515625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 949300156, + "step": 9791 + }, + { + "epoch": 0.9573719202190066, + "grad_norm": 6.941633369853311, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 949396608, + "step": 9792 + }, + { + "epoch": 0.9573719202190066, + "loss": 0.06174634397029877, + "loss_ce": 0.008279546163976192, + "loss_iou": 0.2314453125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 949396608, + "step": 9792 + }, + { + "epoch": 0.9574696910441924, + "grad_norm": 6.614048647917483, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 949493280, + "step": 9793 + }, + { + "epoch": 0.9574696910441924, + "loss": 0.06456296145915985, + "loss_ce": 0.0029756291769444942, + "loss_iou": 0.30859375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 949493280, + "step": 9793 + }, + { + "epoch": 0.9575674618693781, + "grad_norm": 7.536590087011629, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 949589480, + "step": 9794 + }, + { + "epoch": 0.9575674618693781, + "loss": 0.08362608402967453, + "loss_ce": 0.005302722565829754, + "loss_iou": 0.310546875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 949589480, + "step": 9794 + }, + { + "epoch": 0.9576652326945639, + "grad_norm": 2.8179751836845885, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 949685508, + "step": 9795 + }, + { + "epoch": 0.9576652326945639, + "loss": 0.05704805999994278, + "loss_ce": 0.003283717203885317, + "loss_iou": 0.2216796875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 949685508, + "step": 9795 + }, + { + "epoch": 0.9577630035197497, + "grad_norm": 8.2243751693448, + "learning_rate": 5e-05, + "loss": 0.1164, + "num_input_tokens_seen": 949783084, + "step": 9796 + }, + { + "epoch": 0.9577630035197497, + "loss": 0.11987851560115814, + "loss_ce": 0.006566743366420269, + "loss_iou": 0.298828125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 949783084, + "step": 9796 + }, + { + "epoch": 0.9578607743449354, + "grad_norm": 6.393433550996918, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 949879936, + "step": 9797 + }, + { + "epoch": 0.9578607743449354, + "loss": 0.058706801384687424, + "loss_ce": 0.005026381462812424, + "loss_iou": 0.224609375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 949879936, + "step": 9797 + }, + { + "epoch": 0.9579585451701212, + "grad_norm": 4.0536938097855355, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 949976076, + "step": 9798 + }, + { + "epoch": 0.9579585451701212, + "loss": 0.05095963925123215, + "loss_ce": 0.006693891249597073, + "loss_iou": 0.2890625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 949976076, + "step": 9798 + }, + { + "epoch": 0.958056315995307, + "grad_norm": 9.139390111150329, + "learning_rate": 5e-05, + "loss": 0.0474, + "num_input_tokens_seen": 950073596, + "step": 9799 + }, + { + "epoch": 0.958056315995307, + "loss": 0.05541067197918892, + "loss_ce": 0.0036299708299338818, + "loss_iou": 0.32421875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 950073596, + "step": 9799 + }, + { + "epoch": 0.9581540868204927, + "grad_norm": 12.85629100083569, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 950169792, + "step": 9800 + }, + { + "epoch": 0.9581540868204927, + "loss": 0.1001947745680809, + "loss_ce": 0.007390816695988178, + "loss_iou": 0.25, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 950169792, + "step": 9800 + }, + { + "epoch": 0.9582518576456786, + "grad_norm": 3.322098955867376, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 950266840, + "step": 9801 + }, + { + "epoch": 0.9582518576456786, + "loss": 0.08537209033966064, + "loss_ce": 0.0037833419628441334, + "loss_iou": 0.2490234375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 950266840, + "step": 9801 + }, + { + "epoch": 0.9583496284708642, + "grad_norm": 2.7113061081056227, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 950364284, + "step": 9802 + }, + { + "epoch": 0.9583496284708642, + "loss": 0.03826802968978882, + "loss_ce": 0.001555385417304933, + "loss_iou": 0.263671875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 950364284, + "step": 9802 + }, + { + "epoch": 0.95844739929605, + "grad_norm": 4.634538720682775, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 950460684, + "step": 9803 + }, + { + "epoch": 0.95844739929605, + "loss": 0.062177058309316635, + "loss_ce": 0.004666680470108986, + "loss_iou": 0.30859375, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 950460684, + "step": 9803 + }, + { + "epoch": 0.9585451701212359, + "grad_norm": 8.225306948445581, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 950556836, + "step": 9804 + }, + { + "epoch": 0.9585451701212359, + "loss": 0.05214444547891617, + "loss_ce": 0.009190955199301243, + "loss_iou": 0.32421875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 950556836, + "step": 9804 + }, + { + "epoch": 0.9586429409464216, + "grad_norm": 28.811358195949687, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 950652696, + "step": 9805 + }, + { + "epoch": 0.9586429409464216, + "loss": 0.0467672273516655, + "loss_ce": 0.0038747675716876984, + "loss_iou": 0.298828125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 950652696, + "step": 9805 + }, + { + "epoch": 0.9587407117716074, + "grad_norm": 17.449961171377048, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 950749096, + "step": 9806 + }, + { + "epoch": 0.9587407117716074, + "loss": 0.06917775422334671, + "loss_ce": 0.0020848552230745554, + "loss_iou": 0.345703125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 950749096, + "step": 9806 + }, + { + "epoch": 0.9588384825967932, + "grad_norm": 4.327412179091511, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 950846076, + "step": 9807 + }, + { + "epoch": 0.9588384825967932, + "loss": 0.0765313133597374, + "loss_ce": 0.003258608980104327, + "loss_iou": 0.197265625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 950846076, + "step": 9807 + }, + { + "epoch": 0.9589362534219789, + "grad_norm": 6.265501312345731, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 950943788, + "step": 9808 + }, + { + "epoch": 0.9589362534219789, + "loss": 0.05199814587831497, + "loss_ce": 0.004772193729877472, + "loss_iou": 0.306640625, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 950943788, + "step": 9808 + }, + { + "epoch": 0.9590340242471647, + "grad_norm": 3.8752888130323604, + "learning_rate": 5e-05, + "loss": 0.0445, + "num_input_tokens_seen": 951040328, + "step": 9809 + }, + { + "epoch": 0.9590340242471647, + "loss": 0.047955222427845, + "loss_ce": 0.0017134591471403837, + "loss_iou": 0.28515625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 951040328, + "step": 9809 + }, + { + "epoch": 0.9591317950723504, + "grad_norm": 3.1680363946767485, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 951136724, + "step": 9810 + }, + { + "epoch": 0.9591317950723504, + "loss": 0.09496469795703888, + "loss_ce": 0.005120948888361454, + "loss_iou": 0.2451171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 951136724, + "step": 9810 + }, + { + "epoch": 0.9592295658975362, + "grad_norm": 2.1428324305907576, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 951233212, + "step": 9811 + }, + { + "epoch": 0.9592295658975362, + "loss": 0.05275043845176697, + "loss_ce": 0.004059640225023031, + "loss_iou": 0.25, + "loss_num": 0.009765625, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 951233212, + "step": 9811 + }, + { + "epoch": 0.959327336722722, + "grad_norm": 6.668461907538247, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 951330600, + "step": 9812 + }, + { + "epoch": 0.959327336722722, + "loss": 0.061352234333753586, + "loss_ce": 0.0016293360386043787, + "loss_iou": 0.30078125, + "loss_num": 0.01190185546875, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 951330600, + "step": 9812 + }, + { + "epoch": 0.9594251075479077, + "grad_norm": 6.912785818233467, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 951427108, + "step": 9813 + }, + { + "epoch": 0.9594251075479077, + "loss": 0.033858224749565125, + "loss_ce": 0.0024632634595036507, + "loss_iou": 0.25, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 951427108, + "step": 9813 + }, + { + "epoch": 0.9595228783730935, + "grad_norm": 3.673379938752075, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 951524368, + "step": 9814 + }, + { + "epoch": 0.9595228783730935, + "loss": 0.06948723644018173, + "loss_ce": 0.0045935120433568954, + "loss_iou": 0.23828125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 951524368, + "step": 9814 + }, + { + "epoch": 0.9596206491982793, + "grad_norm": 7.6131565516860755, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 951621028, + "step": 9815 + }, + { + "epoch": 0.9596206491982793, + "loss": 0.061618342995643616, + "loss_ce": 0.00609923992305994, + "loss_iou": 0.216796875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 951621028, + "step": 9815 + }, + { + "epoch": 0.959718420023465, + "grad_norm": 9.356973529663474, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 951718008, + "step": 9816 + }, + { + "epoch": 0.959718420023465, + "loss": 0.06863131374120712, + "loss_ce": 0.008023400790989399, + "loss_iou": 0.35546875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 951718008, + "step": 9816 + }, + { + "epoch": 0.9598161908486508, + "grad_norm": 6.69397924822103, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 951814420, + "step": 9817 + }, + { + "epoch": 0.9598161908486508, + "loss": 0.07289673388004303, + "loss_ce": 0.0027701943181455135, + "loss_iou": 0.185546875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 951814420, + "step": 9817 + }, + { + "epoch": 0.9599139616738366, + "grad_norm": 37.928587594573635, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 951911280, + "step": 9818 + }, + { + "epoch": 0.9599139616738366, + "loss": 0.04755006358027458, + "loss_ce": 0.0016974018653854728, + "loss_iou": 0.25, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 951911280, + "step": 9818 + }, + { + "epoch": 0.9600117324990223, + "grad_norm": 58.48641997845404, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 952008672, + "step": 9819 + }, + { + "epoch": 0.9600117324990223, + "loss": 0.12073716521263123, + "loss_ce": 0.008882615715265274, + "loss_iou": 0.3359375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 952008672, + "step": 9819 + }, + { + "epoch": 0.9601095033242081, + "grad_norm": 13.448465304841298, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 952106132, + "step": 9820 + }, + { + "epoch": 0.9601095033242081, + "loss": 0.04154832661151886, + "loss_ce": 0.004484730772674084, + "loss_iou": 0.255859375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 952106132, + "step": 9820 + }, + { + "epoch": 0.9602072741493938, + "grad_norm": 5.765171258699122, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 952203104, + "step": 9821 + }, + { + "epoch": 0.9602072741493938, + "loss": 0.0844498872756958, + "loss_ce": 0.005821350496262312, + "loss_iou": 0.322265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 952203104, + "step": 9821 + }, + { + "epoch": 0.9603050449745796, + "grad_norm": 27.832922759893272, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 952299884, + "step": 9822 + }, + { + "epoch": 0.9603050449745796, + "loss": 0.0723189264535904, + "loss_ce": 0.004630933981388807, + "loss_iou": 0.23046875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 952299884, + "step": 9822 + }, + { + "epoch": 0.9604028157997654, + "grad_norm": 7.934269446313897, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 952396312, + "step": 9823 + }, + { + "epoch": 0.9604028157997654, + "loss": 0.10834087431430817, + "loss_ce": 0.007465027272701263, + "loss_iou": 0.1240234375, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 952396312, + "step": 9823 + }, + { + "epoch": 0.9605005866249511, + "grad_norm": 14.269084620230162, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 952493700, + "step": 9824 + }, + { + "epoch": 0.9605005866249511, + "loss": 0.0578756257891655, + "loss_ce": 0.0047140042297542095, + "loss_iou": 0.275390625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 952493700, + "step": 9824 + }, + { + "epoch": 0.9605983574501369, + "grad_norm": 28.469351947497085, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 952590288, + "step": 9825 + }, + { + "epoch": 0.9605983574501369, + "loss": 0.09354475140571594, + "loss_ce": 0.0033195256255567074, + "loss_iou": 0.283203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 952590288, + "step": 9825 + }, + { + "epoch": 0.9606961282753227, + "grad_norm": 10.361675379480424, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 952686572, + "step": 9826 + }, + { + "epoch": 0.9606961282753227, + "loss": 0.06738294661045074, + "loss_ce": 0.008209360763430595, + "loss_iou": 0.1865234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 952686572, + "step": 9826 + }, + { + "epoch": 0.9607938991005084, + "grad_norm": 24.54461989143169, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 952783624, + "step": 9827 + }, + { + "epoch": 0.9607938991005084, + "loss": 0.09174840152263641, + "loss_ce": 0.006253402680158615, + "loss_iou": 0.255859375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 952783624, + "step": 9827 + }, + { + "epoch": 0.9608916699256942, + "grad_norm": 7.56466662606463, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 952881004, + "step": 9828 + }, + { + "epoch": 0.9608916699256942, + "loss": 0.09980188310146332, + "loss_ce": 0.005243172403424978, + "loss_iou": 0.25, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 952881004, + "step": 9828 + }, + { + "epoch": 0.9609894407508799, + "grad_norm": 9.451977872255819, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 952978532, + "step": 9829 + }, + { + "epoch": 0.9609894407508799, + "loss": 0.09561541676521301, + "loss_ce": 0.004581483546644449, + "loss_iou": 0.328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 952978532, + "step": 9829 + }, + { + "epoch": 0.9610872115760657, + "grad_norm": 11.069916131581184, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 953074936, + "step": 9830 + }, + { + "epoch": 0.9610872115760657, + "loss": 0.06475197523832321, + "loss_ce": 0.004014367703348398, + "loss_iou": 0.140625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 953074936, + "step": 9830 + }, + { + "epoch": 0.9611849824012515, + "grad_norm": 8.662141504451377, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 953172252, + "step": 9831 + }, + { + "epoch": 0.9611849824012515, + "loss": 0.0755816176533699, + "loss_ce": 0.004429884720593691, + "loss_iou": 0.291015625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 953172252, + "step": 9831 + }, + { + "epoch": 0.9612827532264372, + "grad_norm": 2.4432940185390635, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 953269496, + "step": 9832 + }, + { + "epoch": 0.9612827532264372, + "loss": 0.05992767587304115, + "loss_ce": 0.00624725641682744, + "loss_iou": 0.298828125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 953269496, + "step": 9832 + }, + { + "epoch": 0.961380524051623, + "grad_norm": 4.857718013320228, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 953366716, + "step": 9833 + }, + { + "epoch": 0.961380524051623, + "loss": 0.05567886680364609, + "loss_ce": 0.008826753124594688, + "loss_iou": 0.314453125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 953366716, + "step": 9833 + }, + { + "epoch": 0.9614782948768088, + "grad_norm": 6.748641332954289, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 953463612, + "step": 9834 + }, + { + "epoch": 0.9614782948768088, + "loss": 0.04253574460744858, + "loss_ce": 0.0013599030207842588, + "loss_iou": 0.283203125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 953463612, + "step": 9834 + }, + { + "epoch": 0.9615760657019945, + "grad_norm": 11.381934619030515, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 953561264, + "step": 9835 + }, + { + "epoch": 0.9615760657019945, + "loss": 0.06872113049030304, + "loss_ce": 0.0016892715357244015, + "loss_iou": 0.296875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 953561264, + "step": 9835 + }, + { + "epoch": 0.9616738365271803, + "grad_norm": 10.070251746838766, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 953657640, + "step": 9836 + }, + { + "epoch": 0.9616738365271803, + "loss": 0.09772427380084991, + "loss_ce": 0.009879427030682564, + "loss_iou": 0.2236328125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 953657640, + "step": 9836 + }, + { + "epoch": 0.9617716073523661, + "grad_norm": 9.527887704740333, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 953755036, + "step": 9837 + }, + { + "epoch": 0.9617716073523661, + "loss": 0.09123092889785767, + "loss_ce": 0.006269988603889942, + "loss_iou": 0.33984375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 953755036, + "step": 9837 + }, + { + "epoch": 0.9618693781775518, + "grad_norm": 9.587691032555865, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 953853216, + "step": 9838 + }, + { + "epoch": 0.9618693781775518, + "loss": 0.08017440140247345, + "loss_ce": 0.004307697992771864, + "loss_iou": 0.3515625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 953853216, + "step": 9838 + }, + { + "epoch": 0.9619671490027376, + "grad_norm": 9.67249923483289, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 953950948, + "step": 9839 + }, + { + "epoch": 0.9619671490027376, + "loss": 0.10716027021408081, + "loss_ce": 0.005445183254778385, + "loss_iou": 0.328125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 953950948, + "step": 9839 + }, + { + "epoch": 0.9620649198279233, + "grad_norm": 9.124460876195487, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 954047724, + "step": 9840 + }, + { + "epoch": 0.9620649198279233, + "loss": 0.0417400598526001, + "loss_ce": 0.00685084005817771, + "loss_iou": 0.263671875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 954047724, + "step": 9840 + }, + { + "epoch": 0.9621626906531091, + "grad_norm": 3.459201322816965, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 954145152, + "step": 9841 + }, + { + "epoch": 0.9621626906531091, + "loss": 0.04609762504696846, + "loss_ce": 0.0021599417086690664, + "loss_iou": 0.306640625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 954145152, + "step": 9841 + }, + { + "epoch": 0.9622604614782949, + "grad_norm": 12.68554128504992, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 954242264, + "step": 9842 + }, + { + "epoch": 0.9622604614782949, + "loss": 0.08351577073335648, + "loss_ce": 0.005077967420220375, + "loss_iou": 0.345703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 954242264, + "step": 9842 + }, + { + "epoch": 0.9623582323034806, + "grad_norm": 4.839468288647791, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 954339188, + "step": 9843 + }, + { + "epoch": 0.9623582323034806, + "loss": 0.08030307292938232, + "loss_ce": 0.0038717975839972496, + "loss_iou": 0.287109375, + "loss_num": 0.0152587890625, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 954339188, + "step": 9843 + }, + { + "epoch": 0.9624560031286664, + "grad_norm": 9.6903477738588, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 954437020, + "step": 9844 + }, + { + "epoch": 0.9624560031286664, + "loss": 0.06448543071746826, + "loss_ce": 0.007371782790869474, + "loss_iou": 0.310546875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 954437020, + "step": 9844 + }, + { + "epoch": 0.9625537739538522, + "grad_norm": 14.659909271655296, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 954533416, + "step": 9845 + }, + { + "epoch": 0.9625537739538522, + "loss": 0.07453113049268723, + "loss_ce": 0.007026249542832375, + "loss_iou": 0.33203125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 954533416, + "step": 9845 + }, + { + "epoch": 0.9626515447790379, + "grad_norm": 6.421235888946392, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 954629912, + "step": 9846 + }, + { + "epoch": 0.9626515447790379, + "loss": 0.09392741322517395, + "loss_ce": 0.0085087139159441, + "loss_iou": 0.298828125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 954629912, + "step": 9846 + }, + { + "epoch": 0.9627493156042237, + "grad_norm": 6.73762788949535, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 954727128, + "step": 9847 + }, + { + "epoch": 0.9627493156042237, + "loss": 0.10197219252586365, + "loss_ce": 0.003842912847176194, + "loss_iou": 0.251953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 954727128, + "step": 9847 + }, + { + "epoch": 0.9628470864294094, + "grad_norm": 73.34510180818894, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 954824096, + "step": 9848 + }, + { + "epoch": 0.9628470864294094, + "loss": 0.0433199405670166, + "loss_ce": 0.006355525925755501, + "loss_iou": 0.2197265625, + "loss_num": 0.007415771484375, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 954824096, + "step": 9848 + }, + { + "epoch": 0.9629448572545952, + "grad_norm": 6.998560130778806, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 954920976, + "step": 9849 + }, + { + "epoch": 0.9629448572545952, + "loss": 0.07756124436855316, + "loss_ce": 0.0048225983045995235, + "loss_iou": 0.30859375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 954920976, + "step": 9849 + }, + { + "epoch": 0.963042628079781, + "grad_norm": 4.982019032370779, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 955018336, + "step": 9850 + }, + { + "epoch": 0.963042628079781, + "loss": 0.09860944747924805, + "loss_ce": 0.006553168408572674, + "loss_iou": 0.337890625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 955018336, + "step": 9850 + }, + { + "epoch": 0.9631403989049667, + "grad_norm": 5.035034554778014, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 955115032, + "step": 9851 + }, + { + "epoch": 0.9631403989049667, + "loss": 0.07113344967365265, + "loss_ce": 0.004814936313778162, + "loss_iou": 0.23046875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 955115032, + "step": 9851 + }, + { + "epoch": 0.9632381697301525, + "grad_norm": 2.5087133681362377, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 955211412, + "step": 9852 + }, + { + "epoch": 0.9632381697301525, + "loss": 0.08042252063751221, + "loss_ce": 0.006974340416491032, + "loss_iou": 0.25, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 955211412, + "step": 9852 + }, + { + "epoch": 0.9633359405553383, + "grad_norm": 2.4470358040695923, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 955307512, + "step": 9853 + }, + { + "epoch": 0.9633359405553383, + "loss": 0.07985907793045044, + "loss_ce": 0.006563480477780104, + "loss_iou": 0.1767578125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 955307512, + "step": 9853 + }, + { + "epoch": 0.963433711380524, + "grad_norm": 7.073010243935094, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 955404980, + "step": 9854 + }, + { + "epoch": 0.963433711380524, + "loss": 0.10558849573135376, + "loss_ce": 0.002500114031136036, + "loss_iou": 0.265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 955404980, + "step": 9854 + }, + { + "epoch": 0.9635314822057098, + "grad_norm": 4.165433181091249, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 955502124, + "step": 9855 + }, + { + "epoch": 0.9635314822057098, + "loss": 0.05058453977108002, + "loss_ce": 0.0037095388397574425, + "loss_iou": 0.271484375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 955502124, + "step": 9855 + }, + { + "epoch": 0.9636292530308955, + "grad_norm": 3.5754079765677247, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 955599352, + "step": 9856 + }, + { + "epoch": 0.9636292530308955, + "loss": 0.09012989699840546, + "loss_ce": 0.008525892160832882, + "loss_iou": 0.19140625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 955599352, + "step": 9856 + }, + { + "epoch": 0.9637270238560813, + "grad_norm": 7.735958514125413, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 955696088, + "step": 9857 + }, + { + "epoch": 0.9637270238560813, + "loss": 0.07363145053386688, + "loss_ce": 0.006660629063844681, + "loss_iou": 0.212890625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 955696088, + "step": 9857 + }, + { + "epoch": 0.9638247946812671, + "grad_norm": 14.503594875141209, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 955791932, + "step": 9858 + }, + { + "epoch": 0.9638247946812671, + "loss": 0.08411786705255508, + "loss_ce": 0.0060081263072788715, + "loss_iou": 0.208984375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 955791932, + "step": 9858 + }, + { + "epoch": 0.9639225655064528, + "grad_norm": 6.1264960616209825, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 955888956, + "step": 9859 + }, + { + "epoch": 0.9639225655064528, + "loss": 0.07813630253076553, + "loss_ce": 0.005458693020045757, + "loss_iou": 0.287109375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 955888956, + "step": 9859 + }, + { + "epoch": 0.9640203363316386, + "grad_norm": 6.804653201496501, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 955986556, + "step": 9860 + }, + { + "epoch": 0.9640203363316386, + "loss": 0.08431310951709747, + "loss_ce": 0.009514525532722473, + "loss_iou": 0.314453125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 955986556, + "step": 9860 + }, + { + "epoch": 0.9641181071568244, + "grad_norm": 15.378739066486974, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 956083184, + "step": 9861 + }, + { + "epoch": 0.9641181071568244, + "loss": 0.06272664666175842, + "loss_ce": 0.0037514332216233015, + "loss_iou": 0.2109375, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 956083184, + "step": 9861 + }, + { + "epoch": 0.9642158779820101, + "grad_norm": 10.850489322757333, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 956180836, + "step": 9862 + }, + { + "epoch": 0.9642158779820101, + "loss": 0.09308397024869919, + "loss_ce": 0.006589521653950214, + "loss_iou": 0.3125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 956180836, + "step": 9862 + }, + { + "epoch": 0.9643136488071959, + "grad_norm": 2.8476120050937386, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 956277852, + "step": 9863 + }, + { + "epoch": 0.9643136488071959, + "loss": 0.06888304650783539, + "loss_ce": 0.006924731656908989, + "loss_iou": 0.2578125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 956277852, + "step": 9863 + }, + { + "epoch": 0.9644114196323818, + "grad_norm": 11.77034077886529, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 956375736, + "step": 9864 + }, + { + "epoch": 0.9644114196323818, + "loss": 0.06512103974819183, + "loss_ce": 0.004757266957312822, + "loss_iou": 0.318359375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 956375736, + "step": 9864 + }, + { + "epoch": 0.9645091904575674, + "grad_norm": 10.640466976986627, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 956472820, + "step": 9865 + }, + { + "epoch": 0.9645091904575674, + "loss": 0.05111534148454666, + "loss_ce": 0.0037520581390708685, + "loss_iou": 0.2578125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 956472820, + "step": 9865 + }, + { + "epoch": 0.9646069612827533, + "grad_norm": 3.2880172742573714, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 956568744, + "step": 9866 + }, + { + "epoch": 0.9646069612827533, + "loss": 0.06627929210662842, + "loss_ce": 0.0062054479494690895, + "loss_iou": 0.291015625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 956568744, + "step": 9866 + }, + { + "epoch": 0.964704732107939, + "grad_norm": 2.77315777798539, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 956666392, + "step": 9867 + }, + { + "epoch": 0.964704732107939, + "loss": 0.06165175884962082, + "loss_ce": 0.00629287026822567, + "loss_iou": 0.302734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 956666392, + "step": 9867 + }, + { + "epoch": 0.9648025029331248, + "grad_norm": 3.347980086574337, + "learning_rate": 5e-05, + "loss": 0.1085, + "num_input_tokens_seen": 956762516, + "step": 9868 + }, + { + "epoch": 0.9648025029331248, + "loss": 0.11601768434047699, + "loss_ce": 0.004704809281975031, + "loss_iou": 0.373046875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 956762516, + "step": 9868 + }, + { + "epoch": 0.9649002737583106, + "grad_norm": 3.0212042375409207, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 956859512, + "step": 9869 + }, + { + "epoch": 0.9649002737583106, + "loss": 0.07159200310707092, + "loss_ce": 0.00742879556491971, + "loss_iou": 0.267578125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 956859512, + "step": 9869 + }, + { + "epoch": 0.9649980445834963, + "grad_norm": 13.358849266012346, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 956956516, + "step": 9870 + }, + { + "epoch": 0.9649980445834963, + "loss": 0.05978928506374359, + "loss_ce": 0.003987888805568218, + "loss_iou": 0.255859375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 956956516, + "step": 9870 + }, + { + "epoch": 0.9650958154086821, + "grad_norm": 31.123847387181044, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 957053456, + "step": 9871 + }, + { + "epoch": 0.9650958154086821, + "loss": 0.04533721134066582, + "loss_ce": 0.0019259564578533173, + "loss_iou": 0.333984375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 957053456, + "step": 9871 + }, + { + "epoch": 0.9651935862338679, + "grad_norm": 17.155694934825735, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 957150348, + "step": 9872 + }, + { + "epoch": 0.9651935862338679, + "loss": 0.08381478488445282, + "loss_ce": 0.0029279396403580904, + "loss_iou": 0.314453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 957150348, + "step": 9872 + }, + { + "epoch": 0.9652913570590536, + "grad_norm": 5.758618418309926, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 957245788, + "step": 9873 + }, + { + "epoch": 0.9652913570590536, + "loss": 0.03514597564935684, + "loss_ce": 0.006863808259367943, + "loss_iou": 0.2197265625, + "loss_num": 0.005645751953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 957245788, + "step": 9873 + }, + { + "epoch": 0.9653891278842394, + "grad_norm": 21.22473720409377, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 957342960, + "step": 9874 + }, + { + "epoch": 0.9653891278842394, + "loss": 0.04485958069562912, + "loss_ce": 0.005476644262671471, + "loss_iou": 0.298828125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 957342960, + "step": 9874 + }, + { + "epoch": 0.9654868987094251, + "grad_norm": 25.63305188974067, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 957440272, + "step": 9875 + }, + { + "epoch": 0.9654868987094251, + "loss": 0.08471475541591644, + "loss_ce": 0.006193031556904316, + "loss_iou": 0.3515625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 957440272, + "step": 9875 + }, + { + "epoch": 0.9655846695346109, + "grad_norm": 3.866620078182863, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 957536712, + "step": 9876 + }, + { + "epoch": 0.9655846695346109, + "loss": 0.09378691017627716, + "loss_ce": 0.008139323443174362, + "loss_iou": 0.318359375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 957536712, + "step": 9876 + }, + { + "epoch": 0.9656824403597967, + "grad_norm": 4.076418551181978, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 957632996, + "step": 9877 + }, + { + "epoch": 0.9656824403597967, + "loss": 0.03470870107412338, + "loss_ce": 0.005053248256444931, + "loss_iou": 0.2197265625, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 957632996, + "step": 9877 + }, + { + "epoch": 0.9657802111849824, + "grad_norm": 12.236997193564251, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 957729864, + "step": 9878 + }, + { + "epoch": 0.9657802111849824, + "loss": 0.09492838382720947, + "loss_ce": 0.005405065603554249, + "loss_iou": 0.32421875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 957729864, + "step": 9878 + }, + { + "epoch": 0.9658779820101682, + "grad_norm": 2.731364877429608, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 957826548, + "step": 9879 + }, + { + "epoch": 0.9658779820101682, + "loss": 0.0456487312912941, + "loss_ce": 0.005846179556101561, + "loss_iou": 0.2578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 957826548, + "step": 9879 + }, + { + "epoch": 0.965975752835354, + "grad_norm": 3.286253023066255, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 957923544, + "step": 9880 + }, + { + "epoch": 0.965975752835354, + "loss": 0.14064881205558777, + "loss_ce": 0.007271731738001108, + "loss_iou": 0.240234375, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 957923544, + "step": 9880 + }, + { + "epoch": 0.9660735236605397, + "grad_norm": 2.9179991425151264, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 958020508, + "step": 9881 + }, + { + "epoch": 0.9660735236605397, + "loss": 0.07676029205322266, + "loss_ce": 0.005257604643702507, + "loss_iou": 0.267578125, + "loss_num": 0.01434326171875, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 958020508, + "step": 9881 + }, + { + "epoch": 0.9661712944857255, + "grad_norm": 6.283715151008921, + "learning_rate": 5e-05, + "loss": 0.0821, + "num_input_tokens_seen": 958117880, + "step": 9882 + }, + { + "epoch": 0.9661712944857255, + "loss": 0.09789367020130157, + "loss_ce": 0.004601434338837862, + "loss_iou": 0.359375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 958117880, + "step": 9882 + }, + { + "epoch": 0.9662690653109113, + "grad_norm": 6.843344015196966, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 958215000, + "step": 9883 + }, + { + "epoch": 0.9662690653109113, + "loss": 0.04605680704116821, + "loss_ce": 0.007242258638143539, + "loss_iou": 0.302734375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 958215000, + "step": 9883 + }, + { + "epoch": 0.966366836136097, + "grad_norm": 2.848537467882137, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 958312060, + "step": 9884 + }, + { + "epoch": 0.966366836136097, + "loss": 0.08454567193984985, + "loss_ce": 0.0065122339874506, + "loss_iou": 0.251953125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 958312060, + "step": 9884 + }, + { + "epoch": 0.9664646069612828, + "grad_norm": 18.99547258819358, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 958409056, + "step": 9885 + }, + { + "epoch": 0.9664646069612828, + "loss": 0.05887889862060547, + "loss_ce": 0.006297112908214331, + "loss_iou": 0.251953125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 958409056, + "step": 9885 + }, + { + "epoch": 0.9665623777864685, + "grad_norm": 8.862136571199374, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 958506932, + "step": 9886 + }, + { + "epoch": 0.9665623777864685, + "loss": 0.0623072050511837, + "loss_ce": 0.004522171337157488, + "loss_iou": 0.330078125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 958506932, + "step": 9886 + }, + { + "epoch": 0.9666601486116543, + "grad_norm": 9.197423780976848, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 958604604, + "step": 9887 + }, + { + "epoch": 0.9666601486116543, + "loss": 0.0866066962480545, + "loss_ce": 0.005963991861790419, + "loss_iou": 0.27734375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 958604604, + "step": 9887 + }, + { + "epoch": 0.9667579194368401, + "grad_norm": 13.580086908266056, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 958701596, + "step": 9888 + }, + { + "epoch": 0.9667579194368401, + "loss": 0.08938455581665039, + "loss_ce": 0.008368022739887238, + "loss_iou": 0.28125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 958701596, + "step": 9888 + }, + { + "epoch": 0.9668556902620258, + "grad_norm": 16.61190671805828, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 958798340, + "step": 9889 + }, + { + "epoch": 0.9668556902620258, + "loss": 0.11946463584899902, + "loss_ce": 0.008487455546855927, + "loss_iou": 0.2314453125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 958798340, + "step": 9889 + }, + { + "epoch": 0.9669534610872116, + "grad_norm": 7.669973929579996, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 958895716, + "step": 9890 + }, + { + "epoch": 0.9669534610872116, + "loss": 0.06723244488239288, + "loss_ce": 0.007833793759346008, + "loss_iou": 0.27734375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 958895716, + "step": 9890 + }, + { + "epoch": 0.9670512319123974, + "grad_norm": 22.485212076977113, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 958992364, + "step": 9891 + }, + { + "epoch": 0.9670512319123974, + "loss": 0.03581772744655609, + "loss_ce": 0.006978616118431091, + "loss_iou": 0.1875, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 958992364, + "step": 9891 + }, + { + "epoch": 0.9671490027375831, + "grad_norm": 8.905125821973746, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 959088832, + "step": 9892 + }, + { + "epoch": 0.9671490027375831, + "loss": 0.05616489797830582, + "loss_ce": 0.005032699089497328, + "loss_iou": 0.2021484375, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 959088832, + "step": 9892 + }, + { + "epoch": 0.9672467735627689, + "grad_norm": 7.526322944682277, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 959184744, + "step": 9893 + }, + { + "epoch": 0.9672467735627689, + "loss": 0.06072705239057541, + "loss_ce": 0.0033311168663203716, + "loss_iou": 0.1474609375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 959184744, + "step": 9893 + }, + { + "epoch": 0.9673445443879546, + "grad_norm": 3.9802877860865924, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 959281336, + "step": 9894 + }, + { + "epoch": 0.9673445443879546, + "loss": 0.09396316111087799, + "loss_ce": 0.00979568436741829, + "loss_iou": 0.27734375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 959281336, + "step": 9894 + }, + { + "epoch": 0.9674423152131404, + "grad_norm": 17.29115697207182, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 959377280, + "step": 9895 + }, + { + "epoch": 0.9674423152131404, + "loss": 0.05543513596057892, + "loss_ce": 0.005310013424605131, + "loss_iou": 0.1982421875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 959377280, + "step": 9895 + }, + { + "epoch": 0.9675400860383262, + "grad_norm": 11.921647455997983, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 959475176, + "step": 9896 + }, + { + "epoch": 0.9675400860383262, + "loss": 0.06873415410518646, + "loss_ce": 0.003121362067759037, + "loss_iou": 0.296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 959475176, + "step": 9896 + }, + { + "epoch": 0.9676378568635119, + "grad_norm": 4.781734262058369, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 959572308, + "step": 9897 + }, + { + "epoch": 0.9676378568635119, + "loss": 0.04629142954945564, + "loss_ce": 0.006343919783830643, + "loss_iou": 0.2177734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 959572308, + "step": 9897 + }, + { + "epoch": 0.9677356276886977, + "grad_norm": 5.335944578472822, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 959669200, + "step": 9898 + }, + { + "epoch": 0.9677356276886977, + "loss": 0.04840972274541855, + "loss_ce": 0.005654596723616123, + "loss_iou": 0.29296875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 959669200, + "step": 9898 + }, + { + "epoch": 0.9678333985138835, + "grad_norm": 3.2517333771781876, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 959765824, + "step": 9899 + }, + { + "epoch": 0.9678333985138835, + "loss": 0.05009838938713074, + "loss_ce": 0.0009140667389146984, + "loss_iou": 0.298828125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 959765824, + "step": 9899 + }, + { + "epoch": 0.9679311693390692, + "grad_norm": 28.477649976221144, + "learning_rate": 5e-05, + "loss": 0.1247, + "num_input_tokens_seen": 959862900, + "step": 9900 + }, + { + "epoch": 0.9679311693390692, + "loss": 0.10189719498157501, + "loss_ce": 0.006583179347217083, + "loss_iou": 0.2294921875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 959862900, + "step": 9900 + }, + { + "epoch": 0.968028940164255, + "grad_norm": 36.26408692211156, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 959959284, + "step": 9901 + }, + { + "epoch": 0.968028940164255, + "loss": 0.07686979323625565, + "loss_ce": 0.0030325152911245823, + "loss_iou": 0.357421875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 959959284, + "step": 9901 + }, + { + "epoch": 0.9681267109894407, + "grad_norm": 8.901690397653665, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 960057396, + "step": 9902 + }, + { + "epoch": 0.9681267109894407, + "loss": 0.09408695995807648, + "loss_ce": 0.002991987159475684, + "loss_iou": 0.392578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 960057396, + "step": 9902 + }, + { + "epoch": 0.9682244818146265, + "grad_norm": 4.801478312300532, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 960154108, + "step": 9903 + }, + { + "epoch": 0.9682244818146265, + "loss": 0.08015850186347961, + "loss_ce": 0.007648732978850603, + "loss_iou": 0.32421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 960154108, + "step": 9903 + }, + { + "epoch": 0.9683222526398123, + "grad_norm": 6.530357418647068, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 960251884, + "step": 9904 + }, + { + "epoch": 0.9683222526398123, + "loss": 0.07059335708618164, + "loss_ce": 0.003382209688425064, + "loss_iou": 0.310546875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 960251884, + "step": 9904 + }, + { + "epoch": 0.968420023464998, + "grad_norm": 13.069042894798661, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 960348048, + "step": 9905 + }, + { + "epoch": 0.968420023464998, + "loss": 0.035725779831409454, + "loss_ce": 0.004269787576049566, + "loss_iou": 0.1748046875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 960348048, + "step": 9905 + }, + { + "epoch": 0.9685177942901838, + "grad_norm": 7.845348419759105, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 960444592, + "step": 9906 + }, + { + "epoch": 0.9685177942901838, + "loss": 0.11216948181390762, + "loss_ce": 0.011293631047010422, + "loss_iou": 0.271484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 960444592, + "step": 9906 + }, + { + "epoch": 0.9686155651153696, + "grad_norm": 4.691480408644346, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 960542108, + "step": 9907 + }, + { + "epoch": 0.9686155651153696, + "loss": 0.07507063448429108, + "loss_ce": 0.006284010596573353, + "loss_iou": 0.271484375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 960542108, + "step": 9907 + }, + { + "epoch": 0.9687133359405553, + "grad_norm": 13.343431889901638, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 960639364, + "step": 9908 + }, + { + "epoch": 0.9687133359405553, + "loss": 0.07843334972858429, + "loss_ce": 0.008296323008835316, + "loss_iou": 0.306640625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 960639364, + "step": 9908 + }, + { + "epoch": 0.9688111067657411, + "grad_norm": 17.704355156835444, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 960735920, + "step": 9909 + }, + { + "epoch": 0.9688111067657411, + "loss": 0.05604159086942673, + "loss_ce": 0.00231539411470294, + "loss_iou": 0.24609375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 960735920, + "step": 9909 + }, + { + "epoch": 0.9689088775909269, + "grad_norm": 5.037860449410196, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 960833416, + "step": 9910 + }, + { + "epoch": 0.9689088775909269, + "loss": 0.072355255484581, + "loss_ce": 0.003171902149915695, + "loss_iou": 0.26953125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 960833416, + "step": 9910 + }, + { + "epoch": 0.9690066484161126, + "grad_norm": 13.37556249734285, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 960930656, + "step": 9911 + }, + { + "epoch": 0.9690066484161126, + "loss": 0.07190835475921631, + "loss_ce": 0.005380030255764723, + "loss_iou": 0.2412109375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 960930656, + "step": 9911 + }, + { + "epoch": 0.9691044192412984, + "grad_norm": 7.601618507874437, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 961028104, + "step": 9912 + }, + { + "epoch": 0.9691044192412984, + "loss": 0.03928777948021889, + "loss_ce": 0.005497191101312637, + "loss_iou": 0.328125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 961028104, + "step": 9912 + }, + { + "epoch": 0.9692021900664841, + "grad_norm": 15.552346174921379, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 961125124, + "step": 9913 + }, + { + "epoch": 0.9692021900664841, + "loss": 0.0880063846707344, + "loss_ce": 0.004105932544916868, + "loss_iou": 0.2373046875, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 961125124, + "step": 9913 + }, + { + "epoch": 0.9692999608916699, + "grad_norm": 15.014147552328945, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 961222628, + "step": 9914 + }, + { + "epoch": 0.9692999608916699, + "loss": 0.06987132132053375, + "loss_ce": 0.004807843826711178, + "loss_iou": 0.287109375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 961222628, + "step": 9914 + }, + { + "epoch": 0.9693977317168557, + "grad_norm": 19.205273884818375, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 961320804, + "step": 9915 + }, + { + "epoch": 0.9693977317168557, + "loss": 0.07219470292329788, + "loss_ce": 0.002111088950186968, + "loss_iou": 0.1396484375, + "loss_num": 0.01397705078125, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 961320804, + "step": 9915 + }, + { + "epoch": 0.9694955025420414, + "grad_norm": 14.707869934331212, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 961417852, + "step": 9916 + }, + { + "epoch": 0.9694955025420414, + "loss": 0.11972448229789734, + "loss_ce": 0.006489011459052563, + "loss_iou": 0.21875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 961417852, + "step": 9916 + }, + { + "epoch": 0.9695932733672272, + "grad_norm": 4.952638598352057, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 961515508, + "step": 9917 + }, + { + "epoch": 0.9695932733672272, + "loss": 0.07072240114212036, + "loss_ce": 0.003446399699896574, + "loss_iou": 0.314453125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 961515508, + "step": 9917 + }, + { + "epoch": 0.969691044192413, + "grad_norm": 7.865793768233114, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 961611800, + "step": 9918 + }, + { + "epoch": 0.969691044192413, + "loss": 0.09587827324867249, + "loss_ce": 0.006110819056630135, + "loss_iou": 0.2490234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 961611800, + "step": 9918 + }, + { + "epoch": 0.9697888150175987, + "grad_norm": 4.268615080869936, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 961708632, + "step": 9919 + }, + { + "epoch": 0.9697888150175987, + "loss": 0.07674194872379303, + "loss_ce": 0.01026703417301178, + "loss_iou": 0.212890625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 961708632, + "step": 9919 + }, + { + "epoch": 0.9698865858427845, + "grad_norm": 29.11130328688427, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 961804620, + "step": 9920 + }, + { + "epoch": 0.9698865858427845, + "loss": 0.0632169097661972, + "loss_ce": 0.006499988958239555, + "loss_iou": 0.2314453125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 961804620, + "step": 9920 + }, + { + "epoch": 0.9699843566679702, + "grad_norm": 6.195247758273786, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 961901516, + "step": 9921 + }, + { + "epoch": 0.9699843566679702, + "loss": 0.0535363107919693, + "loss_ce": 0.0032128221355378628, + "loss_iou": 0.259765625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 961901516, + "step": 9921 + }, + { + "epoch": 0.970082127493156, + "grad_norm": 32.88989488793923, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 961998512, + "step": 9922 + }, + { + "epoch": 0.970082127493156, + "loss": 0.06635977327823639, + "loss_ce": 0.0036843009293079376, + "loss_iou": 0.25390625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 961998512, + "step": 9922 + }, + { + "epoch": 0.9701798983183418, + "grad_norm": 9.953451722027955, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 962094888, + "step": 9923 + }, + { + "epoch": 0.9701798983183418, + "loss": 0.08196987956762314, + "loss_ce": 0.007760659791529179, + "loss_iou": 0.255859375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 962094888, + "step": 9923 + }, + { + "epoch": 0.9702776691435275, + "grad_norm": 6.308025500852895, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 962190992, + "step": 9924 + }, + { + "epoch": 0.9702776691435275, + "loss": 0.0690479725599289, + "loss_ce": 0.0056095547042787075, + "loss_iou": 0.28515625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 962190992, + "step": 9924 + }, + { + "epoch": 0.9703754399687133, + "grad_norm": 7.480970899119483, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 962287756, + "step": 9925 + }, + { + "epoch": 0.9703754399687133, + "loss": 0.08161364495754242, + "loss_ce": 0.004968745168298483, + "loss_iou": 0.17578125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 962287756, + "step": 9925 + }, + { + "epoch": 0.9704732107938991, + "grad_norm": 7.493957551895637, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 962385640, + "step": 9926 + }, + { + "epoch": 0.9704732107938991, + "loss": 0.04484611004590988, + "loss_ce": 0.005386883392930031, + "loss_iou": 0.326171875, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 962385640, + "step": 9926 + }, + { + "epoch": 0.9705709816190848, + "grad_norm": 11.108742273249836, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 962481884, + "step": 9927 + }, + { + "epoch": 0.9705709816190848, + "loss": 0.046883177012205124, + "loss_ce": 0.005493712145835161, + "loss_iou": 0.25390625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 962481884, + "step": 9927 + }, + { + "epoch": 0.9706687524442706, + "grad_norm": 4.669451637712224, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 962577812, + "step": 9928 + }, + { + "epoch": 0.9706687524442706, + "loss": 0.03940379619598389, + "loss_ce": 0.005147813819348812, + "loss_iou": 0.20703125, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 962577812, + "step": 9928 + }, + { + "epoch": 0.9707665232694564, + "grad_norm": 22.261341257228924, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 962674936, + "step": 9929 + }, + { + "epoch": 0.9707665232694564, + "loss": 0.07407650351524353, + "loss_ce": 0.003756372956559062, + "loss_iou": 0.1796875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 962674936, + "step": 9929 + }, + { + "epoch": 0.9708642940946421, + "grad_norm": 14.525072175454214, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 962772236, + "step": 9930 + }, + { + "epoch": 0.9708642940946421, + "loss": 0.05194630101323128, + "loss_ce": 0.0028282594867050648, + "loss_iou": 0.34765625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 962772236, + "step": 9930 + }, + { + "epoch": 0.970962064919828, + "grad_norm": 8.677443805380053, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 962868808, + "step": 9931 + }, + { + "epoch": 0.970962064919828, + "loss": 0.1041402518749237, + "loss_ce": 0.00512596545740962, + "loss_iou": 0.3671875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 962868808, + "step": 9931 + }, + { + "epoch": 0.9710598357450136, + "grad_norm": 3.2957589372892144, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 962966428, + "step": 9932 + }, + { + "epoch": 0.9710598357450136, + "loss": 0.06786532700061798, + "loss_ce": 0.003778406884521246, + "loss_iou": 0.2265625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 962966428, + "step": 9932 + }, + { + "epoch": 0.9711576065701994, + "grad_norm": 7.115620477007256, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 963062768, + "step": 9933 + }, + { + "epoch": 0.9711576065701994, + "loss": 0.061166923493146896, + "loss_ce": 0.004007499665021896, + "loss_iou": 0.1953125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 963062768, + "step": 9933 + }, + { + "epoch": 0.9712553773953853, + "grad_norm": 3.859773683469203, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 963159124, + "step": 9934 + }, + { + "epoch": 0.9712553773953853, + "loss": 0.04254165291786194, + "loss_ce": 0.002174527384340763, + "loss_iou": 0.28515625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 963159124, + "step": 9934 + }, + { + "epoch": 0.971353148220571, + "grad_norm": 14.7265476241771, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 963256608, + "step": 9935 + }, + { + "epoch": 0.971353148220571, + "loss": 0.08203145861625671, + "loss_ce": 0.005432335194200277, + "loss_iou": 0.357421875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 963256608, + "step": 9935 + }, + { + "epoch": 0.9714509190457568, + "grad_norm": 5.707661723485512, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 963353920, + "step": 9936 + }, + { + "epoch": 0.9714509190457568, + "loss": 0.05627467855811119, + "loss_ce": 0.004432942718267441, + "loss_iou": 0.41015625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 963353920, + "step": 9936 + }, + { + "epoch": 0.9715486898709426, + "grad_norm": 10.762891313505502, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 963451100, + "step": 9937 + }, + { + "epoch": 0.9715486898709426, + "loss": 0.08607786893844604, + "loss_ce": 0.00747984042391181, + "loss_iou": 0.326171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 963451100, + "step": 9937 + }, + { + "epoch": 0.9716464606961283, + "grad_norm": 14.727729059651827, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 963548416, + "step": 9938 + }, + { + "epoch": 0.9716464606961283, + "loss": 0.0822431668639183, + "loss_ce": 0.006117070093750954, + "loss_iou": 0.236328125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 963548416, + "step": 9938 + }, + { + "epoch": 0.9717442315213141, + "grad_norm": 9.841954078697313, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 963646104, + "step": 9939 + }, + { + "epoch": 0.9717442315213141, + "loss": 0.0816197469830513, + "loss_ce": 0.006378656253218651, + "loss_iou": 0.369140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 963646104, + "step": 9939 + }, + { + "epoch": 0.9718420023464998, + "grad_norm": 10.630589830005773, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 963742116, + "step": 9940 + }, + { + "epoch": 0.9718420023464998, + "loss": 0.06356905400753021, + "loss_ce": 0.0065011875703930855, + "loss_iou": 0.140625, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 963742116, + "step": 9940 + }, + { + "epoch": 0.9719397731716856, + "grad_norm": 13.835572834119674, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 963838964, + "step": 9941 + }, + { + "epoch": 0.9719397731716856, + "loss": 0.06465720385313034, + "loss_ce": 0.00478171743452549, + "loss_iou": 0.294921875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 963838964, + "step": 9941 + }, + { + "epoch": 0.9720375439968714, + "grad_norm": 3.128706290673785, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 963936280, + "step": 9942 + }, + { + "epoch": 0.9720375439968714, + "loss": 0.05776607245206833, + "loss_ce": 0.004146686289459467, + "loss_iou": 0.345703125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 963936280, + "step": 9942 + }, + { + "epoch": 0.9721353148220571, + "grad_norm": 7.01228245890316, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 964032396, + "step": 9943 + }, + { + "epoch": 0.9721353148220571, + "loss": 0.0686379224061966, + "loss_ce": 0.0056954193860292435, + "loss_iou": 0.3125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 964032396, + "step": 9943 + }, + { + "epoch": 0.9722330856472429, + "grad_norm": 5.103719974911791, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 964129100, + "step": 9944 + }, + { + "epoch": 0.9722330856472429, + "loss": 0.05711719021201134, + "loss_ce": 0.005477633792907, + "loss_iou": 0.234375, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 964129100, + "step": 9944 + }, + { + "epoch": 0.9723308564724287, + "grad_norm": 3.6398584432721215, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 964226068, + "step": 9945 + }, + { + "epoch": 0.9723308564724287, + "loss": 0.08010441809892654, + "loss_ce": 0.007892198860645294, + "loss_iou": 0.29296875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 964226068, + "step": 9945 + }, + { + "epoch": 0.9724286272976144, + "grad_norm": 17.41181684270936, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 964323468, + "step": 9946 + }, + { + "epoch": 0.9724286272976144, + "loss": 0.07690799236297607, + "loss_ce": 0.005329019855707884, + "loss_iou": 0.34375, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 964323468, + "step": 9946 + }, + { + "epoch": 0.9725263981228002, + "grad_norm": 9.834011865474455, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 964420276, + "step": 9947 + }, + { + "epoch": 0.9725263981228002, + "loss": 0.06963430345058441, + "loss_ce": 0.005715240724384785, + "loss_iou": 0.251953125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 964420276, + "step": 9947 + }, + { + "epoch": 0.9726241689479859, + "grad_norm": 5.264700394240399, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 964517236, + "step": 9948 + }, + { + "epoch": 0.9726241689479859, + "loss": 0.09541173279285431, + "loss_ce": 0.007772877346724272, + "loss_iou": 0.3046875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 964517236, + "step": 9948 + }, + { + "epoch": 0.9727219397731717, + "grad_norm": 3.5375428947904877, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 964614064, + "step": 9949 + }, + { + "epoch": 0.9727219397731717, + "loss": 0.06369076669216156, + "loss_ce": 0.0025716880336403847, + "loss_iou": 0.322265625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 964614064, + "step": 9949 + }, + { + "epoch": 0.9728197105983575, + "grad_norm": 3.0443195161722536, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 964710632, + "step": 9950 + }, + { + "epoch": 0.9728197105983575, + "loss": 0.052075400948524475, + "loss_ce": 0.004025470465421677, + "loss_iou": 0.251953125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 964710632, + "step": 9950 + }, + { + "epoch": 0.9729174814235432, + "grad_norm": 4.193787417473951, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 964807572, + "step": 9951 + }, + { + "epoch": 0.9729174814235432, + "loss": 0.08170750737190247, + "loss_ce": 0.008091484196484089, + "loss_iou": 0.23828125, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 964807572, + "step": 9951 + }, + { + "epoch": 0.973015252248729, + "grad_norm": 9.075711876470253, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 964903776, + "step": 9952 + }, + { + "epoch": 0.973015252248729, + "loss": 0.10525642335414886, + "loss_ce": 0.00653205206617713, + "loss_iou": 0.306640625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 964903776, + "step": 9952 + }, + { + "epoch": 0.9731130230739148, + "grad_norm": 3.510195917527021, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 965000732, + "step": 9953 + }, + { + "epoch": 0.9731130230739148, + "loss": 0.05416019260883331, + "loss_ce": 0.0046911947429180145, + "loss_iou": 0.25390625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 965000732, + "step": 9953 + }, + { + "epoch": 0.9732107938991005, + "grad_norm": 13.00934198500938, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 965097276, + "step": 9954 + }, + { + "epoch": 0.9732107938991005, + "loss": 0.13669925928115845, + "loss_ce": 0.00634343083947897, + "loss_iou": 0.15625, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 965097276, + "step": 9954 + }, + { + "epoch": 0.9733085647242863, + "grad_norm": 22.764266110280094, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 965194148, + "step": 9955 + }, + { + "epoch": 0.9733085647242863, + "loss": 0.09134788066148758, + "loss_ce": 0.00827140174806118, + "loss_iou": 0.287109375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 965194148, + "step": 9955 + }, + { + "epoch": 0.9734063355494721, + "grad_norm": 19.715732763073813, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 965291164, + "step": 9956 + }, + { + "epoch": 0.9734063355494721, + "loss": 0.061122454702854156, + "loss_ce": 0.003093278268352151, + "loss_iou": 0.310546875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 965291164, + "step": 9956 + }, + { + "epoch": 0.9735041063746578, + "grad_norm": 9.041006135987281, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 965388416, + "step": 9957 + }, + { + "epoch": 0.9735041063746578, + "loss": 0.05595209077000618, + "loss_ce": 0.00386621686629951, + "loss_iou": 0.31640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 965388416, + "step": 9957 + }, + { + "epoch": 0.9736018771998436, + "grad_norm": 8.530005449704964, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 965485540, + "step": 9958 + }, + { + "epoch": 0.9736018771998436, + "loss": 0.0840374156832695, + "loss_ce": 0.0127483569085598, + "loss_iou": 0.326171875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 965485540, + "step": 9958 + }, + { + "epoch": 0.9736996480250293, + "grad_norm": 6.9981527881891425, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 965582336, + "step": 9959 + }, + { + "epoch": 0.9736996480250293, + "loss": 0.0686081200838089, + "loss_ce": 0.005299399606883526, + "loss_iou": 0.365234375, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 965582336, + "step": 9959 + }, + { + "epoch": 0.9737974188502151, + "grad_norm": 11.499782167876718, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 965680220, + "step": 9960 + }, + { + "epoch": 0.9737974188502151, + "loss": 0.08358407020568848, + "loss_ce": 0.0056421756744384766, + "loss_iou": 0.3203125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 965680220, + "step": 9960 + }, + { + "epoch": 0.9738951896754009, + "grad_norm": 9.676944656116959, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 965777380, + "step": 9961 + }, + { + "epoch": 0.9738951896754009, + "loss": 0.05852534994482994, + "loss_ce": 0.0038225925527513027, + "loss_iou": 0.1748046875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 965777380, + "step": 9961 + }, + { + "epoch": 0.9739929605005866, + "grad_norm": 7.848261209376436, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 965874192, + "step": 9962 + }, + { + "epoch": 0.9739929605005866, + "loss": 0.07574422657489777, + "loss_ce": 0.005515652243047953, + "loss_iou": 0.2734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 965874192, + "step": 9962 + }, + { + "epoch": 0.9740907313257724, + "grad_norm": 2.9454644436236657, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 965969912, + "step": 9963 + }, + { + "epoch": 0.9740907313257724, + "loss": 0.06473959237337112, + "loss_ce": 0.005760556552559137, + "loss_iou": 0.232421875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 965969912, + "step": 9963 + }, + { + "epoch": 0.9741885021509582, + "grad_norm": 18.479194164609595, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 966066368, + "step": 9964 + }, + { + "epoch": 0.9741885021509582, + "loss": 0.07413376867771149, + "loss_ce": 0.004233249928802252, + "loss_iou": 0.255859375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 966066368, + "step": 9964 + }, + { + "epoch": 0.9742862729761439, + "grad_norm": 16.77280979838453, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 966163536, + "step": 9965 + }, + { + "epoch": 0.9742862729761439, + "loss": 0.0550372414290905, + "loss_ce": 0.005114298313856125, + "loss_iou": 0.283203125, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 966163536, + "step": 9965 + }, + { + "epoch": 0.9743840438013297, + "grad_norm": 18.465182948459624, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 966260576, + "step": 9966 + }, + { + "epoch": 0.9743840438013297, + "loss": 0.0747140571475029, + "loss_ce": 0.005515448283404112, + "loss_iou": 0.267578125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 966260576, + "step": 9966 + }, + { + "epoch": 0.9744818146265154, + "grad_norm": 7.944889494620783, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 966356608, + "step": 9967 + }, + { + "epoch": 0.9744818146265154, + "loss": 0.06291737407445908, + "loss_ce": 0.003759048180654645, + "loss_iou": 0.28515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 966356608, + "step": 9967 + }, + { + "epoch": 0.9745795854517012, + "grad_norm": 14.810192763771987, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 966453748, + "step": 9968 + }, + { + "epoch": 0.9745795854517012, + "loss": 0.07835070043802261, + "loss_ce": 0.006031674332916737, + "loss_iou": 0.337890625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 966453748, + "step": 9968 + }, + { + "epoch": 0.974677356276887, + "grad_norm": 9.044581025600037, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 966550184, + "step": 9969 + }, + { + "epoch": 0.974677356276887, + "loss": 0.041887328028678894, + "loss_ce": 0.002718015108257532, + "loss_iou": 0.296875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 966550184, + "step": 9969 + }, + { + "epoch": 0.9747751271020727, + "grad_norm": 13.838465362882998, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 966646848, + "step": 9970 + }, + { + "epoch": 0.9747751271020727, + "loss": 0.04809340834617615, + "loss_ce": 0.0038429151754826307, + "loss_iou": 0.25, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 966646848, + "step": 9970 + }, + { + "epoch": 0.9748728979272585, + "grad_norm": 4.158856646535795, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 966743844, + "step": 9971 + }, + { + "epoch": 0.9748728979272585, + "loss": 0.06790584325790405, + "loss_ce": 0.007420002482831478, + "loss_iou": 0.2041015625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 966743844, + "step": 9971 + }, + { + "epoch": 0.9749706687524443, + "grad_norm": 7.656663948898668, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 966840936, + "step": 9972 + }, + { + "epoch": 0.9749706687524443, + "loss": 0.058851391077041626, + "loss_ce": 0.006044535432010889, + "loss_iou": 0.28125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 966840936, + "step": 9972 + }, + { + "epoch": 0.97506843957763, + "grad_norm": 3.862233817878046, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 966937748, + "step": 9973 + }, + { + "epoch": 0.97506843957763, + "loss": 0.06160229444503784, + "loss_ce": 0.0033518674317747355, + "loss_iou": 0.32421875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 966937748, + "step": 9973 + }, + { + "epoch": 0.9751662104028158, + "grad_norm": 2.854064408422527, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 967034384, + "step": 9974 + }, + { + "epoch": 0.9751662104028158, + "loss": 0.034753650426864624, + "loss_ce": 0.0014589696656912565, + "loss_iou": 0.1845703125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 967034384, + "step": 9974 + }, + { + "epoch": 0.9752639812280016, + "grad_norm": 7.7676595553814245, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 967131584, + "step": 9975 + }, + { + "epoch": 0.9752639812280016, + "loss": 0.08617483079433441, + "loss_ce": 0.0012901831651106477, + "loss_iou": 0.267578125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 967131584, + "step": 9975 + }, + { + "epoch": 0.9753617520531873, + "grad_norm": 5.332060993222473, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 967228968, + "step": 9976 + }, + { + "epoch": 0.9753617520531873, + "loss": 0.06613554060459137, + "loss_ce": 0.004932542331516743, + "loss_iou": 0.28515625, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 967228968, + "step": 9976 + }, + { + "epoch": 0.9754595228783731, + "grad_norm": 2.753193861168007, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 967325860, + "step": 9977 + }, + { + "epoch": 0.9754595228783731, + "loss": 0.11936886608600616, + "loss_ce": 0.0019982580561190844, + "loss_iou": 0.322265625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 967325860, + "step": 9977 + }, + { + "epoch": 0.9755572937035588, + "grad_norm": 6.9482108580592294, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 967422868, + "step": 9978 + }, + { + "epoch": 0.9755572937035588, + "loss": 0.06846533715724945, + "loss_ce": 0.00440130615606904, + "loss_iou": 0.25, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 967422868, + "step": 9978 + }, + { + "epoch": 0.9756550645287446, + "grad_norm": 11.48707415929737, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 967519116, + "step": 9979 + }, + { + "epoch": 0.9756550645287446, + "loss": 0.08690369874238968, + "loss_ce": 0.005404596216976643, + "loss_iou": 0.193359375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 967519116, + "step": 9979 + }, + { + "epoch": 0.9757528353539304, + "grad_norm": 11.918398175708036, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 967617044, + "step": 9980 + }, + { + "epoch": 0.9757528353539304, + "loss": 0.06404197216033936, + "loss_ce": 0.007447126787155867, + "loss_iou": 0.26953125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 967617044, + "step": 9980 + }, + { + "epoch": 0.9758506061791161, + "grad_norm": 7.585463229034606, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 967714284, + "step": 9981 + }, + { + "epoch": 0.9758506061791161, + "loss": 0.06962696462869644, + "loss_ce": 0.007050672546029091, + "loss_iou": 0.318359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 967714284, + "step": 9981 + }, + { + "epoch": 0.9759483770043019, + "grad_norm": 4.470645280205999, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 967811248, + "step": 9982 + }, + { + "epoch": 0.9759483770043019, + "loss": 0.06103377789258957, + "loss_ce": 0.003386074909940362, + "loss_iou": 0.384765625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 967811248, + "step": 9982 + }, + { + "epoch": 0.9760461478294877, + "grad_norm": 12.848258097894195, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 967907552, + "step": 9983 + }, + { + "epoch": 0.9760461478294877, + "loss": 0.08799317479133606, + "loss_ce": 0.002711805049329996, + "loss_iou": 0.224609375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 967907552, + "step": 9983 + }, + { + "epoch": 0.9761439186546734, + "grad_norm": 8.630145542313986, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 968003700, + "step": 9984 + }, + { + "epoch": 0.9761439186546734, + "loss": 0.079627126455307, + "loss_ce": 0.01082524098455906, + "loss_iou": 0.265625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 968003700, + "step": 9984 + }, + { + "epoch": 0.9762416894798592, + "grad_norm": 7.302460492233928, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 968100152, + "step": 9985 + }, + { + "epoch": 0.9762416894798592, + "loss": 0.07601707428693771, + "loss_ce": 0.006681133061647415, + "loss_iou": 0.26953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 968100152, + "step": 9985 + }, + { + "epoch": 0.9763394603050449, + "grad_norm": 4.443862709859711, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 968197564, + "step": 9986 + }, + { + "epoch": 0.9763394603050449, + "loss": 0.07657493650913239, + "loss_ce": 0.007177966181188822, + "loss_iou": 0.2216796875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 968197564, + "step": 9986 + }, + { + "epoch": 0.9764372311302307, + "grad_norm": 12.067147510762819, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 968294580, + "step": 9987 + }, + { + "epoch": 0.9764372311302307, + "loss": 0.04983397573232651, + "loss_ce": 0.004187305923551321, + "loss_iou": 0.201171875, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 968294580, + "step": 9987 + }, + { + "epoch": 0.9765350019554165, + "grad_norm": 5.799832841317043, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 968391552, + "step": 9988 + }, + { + "epoch": 0.9765350019554165, + "loss": 0.05241423472762108, + "loss_ce": 0.003490741364657879, + "loss_iou": 0.2578125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 968391552, + "step": 9988 + }, + { + "epoch": 0.9766327727806022, + "grad_norm": 8.101755074247471, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 968488684, + "step": 9989 + }, + { + "epoch": 0.9766327727806022, + "loss": 0.11178925633430481, + "loss_ce": 0.007602248340845108, + "loss_iou": 0.3203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 968488684, + "step": 9989 + }, + { + "epoch": 0.976730543605788, + "grad_norm": 14.508020089578059, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 968585808, + "step": 9990 + }, + { + "epoch": 0.976730543605788, + "loss": 0.06073405593633652, + "loss_ce": 0.002720136661082506, + "loss_iou": 0.37109375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 968585808, + "step": 9990 + }, + { + "epoch": 0.9768283144309738, + "grad_norm": 11.396434465206134, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 968682460, + "step": 9991 + }, + { + "epoch": 0.9768283144309738, + "loss": 0.06538759171962738, + "loss_ce": 0.004260885529220104, + "loss_iou": 0.255859375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 968682460, + "step": 9991 + }, + { + "epoch": 0.9769260852561595, + "grad_norm": 8.064153403192085, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 968779332, + "step": 9992 + }, + { + "epoch": 0.9769260852561595, + "loss": 0.07292291522026062, + "loss_ce": 0.007462708279490471, + "loss_iou": 0.318359375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 968779332, + "step": 9992 + }, + { + "epoch": 0.9770238560813453, + "grad_norm": 7.493891606102689, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 968876064, + "step": 9993 + }, + { + "epoch": 0.9770238560813453, + "loss": 0.052536725997924805, + "loss_ce": 0.005982163827866316, + "loss_iou": 0.232421875, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 968876064, + "step": 9993 + }, + { + "epoch": 0.977121626906531, + "grad_norm": 7.240885097850195, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 968973376, + "step": 9994 + }, + { + "epoch": 0.977121626906531, + "loss": 0.06311318278312683, + "loss_ce": 0.0038785641081631184, + "loss_iou": 0.255859375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 968973376, + "step": 9994 + }, + { + "epoch": 0.9772193977317168, + "grad_norm": 3.3040691417091796, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 969069840, + "step": 9995 + }, + { + "epoch": 0.9772193977317168, + "loss": 0.09135295450687408, + "loss_ce": 0.006811628583818674, + "loss_iou": 0.2373046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 969069840, + "step": 9995 + }, + { + "epoch": 0.9773171685569026, + "grad_norm": 7.6471085106499785, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 969167460, + "step": 9996 + }, + { + "epoch": 0.9773171685569026, + "loss": 0.07968077063560486, + "loss_ce": 0.003722515655681491, + "loss_iou": 0.3203125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 969167460, + "step": 9996 + }, + { + "epoch": 0.9774149393820883, + "grad_norm": 18.20652837536616, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 969264520, + "step": 9997 + }, + { + "epoch": 0.9774149393820883, + "loss": 0.07794298231601715, + "loss_ce": 0.007447374053299427, + "loss_iou": 0.3203125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 969264520, + "step": 9997 + }, + { + "epoch": 0.9775127102072741, + "grad_norm": 7.842633607353922, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 969361480, + "step": 9998 + }, + { + "epoch": 0.9775127102072741, + "loss": 0.0834752768278122, + "loss_ce": 0.0044347550719976425, + "loss_iou": 0.33984375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 969361480, + "step": 9998 + }, + { + "epoch": 0.97761048103246, + "grad_norm": 8.599725047515653, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 969458844, + "step": 9999 + }, + { + "epoch": 0.97761048103246, + "loss": 0.06336678564548492, + "loss_ce": 0.0056961942464113235, + "loss_iou": 0.27734375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 969458844, + "step": 9999 + }, + { + "epoch": 0.9777082518576456, + "grad_norm": 4.923914967303662, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 969556836, + "step": 10000 + }, + { + "epoch": 0.9777082518576456, + "eval_seeclick_CIoU": 0.5806281566619873, + "eval_seeclick_GIoU": 0.5896742641925812, + "eval_seeclick_IoU": 0.6146319210529327, + "eval_seeclick_MAE_all": 0.06162789277732372, + "eval_seeclick_MAE_h": 0.039920879527926445, + "eval_seeclick_MAE_w": 0.08496637269854546, + "eval_seeclick_MAE_x": 0.08761386945843697, + "eval_seeclick_MAE_y": 0.03401045594364405, + "eval_seeclick_NUM_probability": 0.9999979734420776, + "eval_seeclick_inside_bbox": 0.8153409063816071, + "eval_seeclick_loss": 0.2432640939950943, + "eval_seeclick_loss_ce": 0.01011788658797741, + "eval_seeclick_loss_iou": 0.409423828125, + "eval_seeclick_loss_num": 0.049533843994140625, + "eval_seeclick_loss_xval": 0.247833251953125, + "eval_seeclick_runtime": 76.3152, + "eval_seeclick_samples_per_second": 0.563, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 969556836, + "step": 10000 + }, + { + "epoch": 0.9777082518576456, + "eval_icons_CIoU": 0.6897314786911011, + "eval_icons_GIoU": 0.6982936263084412, + "eval_icons_IoU": 0.7121067345142365, + "eval_icons_MAE_all": 0.04913754761219025, + "eval_icons_MAE_h": 0.04712135158479214, + "eval_icons_MAE_w": 0.05394958145916462, + "eval_icons_MAE_x": 0.05193592049181461, + "eval_icons_MAE_y": 0.043543324805796146, + "eval_icons_NUM_probability": 0.9999995827674866, + "eval_icons_inside_bbox": 0.8628472089767456, + "eval_icons_loss": 0.15722689032554626, + "eval_icons_loss_ce": 3.502988903392179e-07, + "eval_icons_loss_iou": 0.3839111328125, + "eval_icons_loss_num": 0.034854888916015625, + "eval_icons_loss_xval": 0.1742095947265625, + "eval_icons_runtime": 86.0513, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 969556836, + "step": 10000 + }, + { + "epoch": 0.9777082518576456, + "eval_screenspot_CIoU": 0.26980967354029417, + "eval_screenspot_GIoU": 0.23846702029307684, + "eval_screenspot_IoU": 0.37219689786434174, + "eval_screenspot_MAE_all": 0.1765059530735016, + "eval_screenspot_MAE_h": 0.14009813716014227, + "eval_screenspot_MAE_w": 0.20116404443979263, + "eval_screenspot_MAE_x": 0.22104636331399283, + "eval_screenspot_MAE_y": 0.14371528228123984, + "eval_screenspot_NUM_probability": 0.9999805490175883, + "eval_screenspot_inside_bbox": 0.5691666603088379, + "eval_screenspot_loss": 0.6071813702583313, + "eval_screenspot_loss_ce": 0.022019679347674053, + "eval_screenspot_loss_iou": 0.2923583984375, + "eval_screenspot_loss_num": 0.12309773763020833, + "eval_screenspot_loss_xval": 0.615234375, + "eval_screenspot_runtime": 151.0868, + "eval_screenspot_samples_per_second": 0.589, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 969556836, + "step": 10000 + }, + { + "epoch": 0.9777082518576456, + "eval_compot_CIoU": 0.49806012213230133, + "eval_compot_GIoU": 0.488751620054245, + "eval_compot_IoU": 0.5511197447776794, + "eval_compot_MAE_all": 0.08279669843614101, + "eval_compot_MAE_h": 0.06934069469571114, + "eval_compot_MAE_w": 0.09881719388067722, + "eval_compot_MAE_x": 0.09071443974971771, + "eval_compot_MAE_y": 0.07231445983052254, + "eval_compot_NUM_probability": 0.9999934732913971, + "eval_compot_inside_bbox": 0.7760416567325592, + "eval_compot_loss": 0.3013363480567932, + "eval_compot_loss_ce": 0.026450689882040024, + "eval_compot_loss_iou": 0.4736328125, + "eval_compot_loss_num": 0.04724884033203125, + "eval_compot_loss_xval": 0.2362213134765625, + "eval_compot_runtime": 86.2232, + "eval_compot_samples_per_second": 0.58, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 969556836, + "step": 10000 + } + ], + "logging_steps": 1.0, + "max_steps": 10228, + "num_input_tokens_seen": 969556836, + "num_train_epochs": 1, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6010141228924928.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}