{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.655574043261232, "eval_steps": 250, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0033277870216306157, "grad_norm": 99.72433471679688, "learning_rate": 5e-06, "loss": 2.2579, "num_input_tokens_seen": 62524, "step": 1 }, { "epoch": 0.0033277870216306157, "loss": 2.4882845878601074, "loss_ce": 0.4775424301624298, "loss_iou": 0.55859375, "loss_num": 0.177734375, "loss_xval": 2.015625, "num_input_tokens_seen": 62524, "step": 1 }, { "epoch": 0.0066555740432612314, "grad_norm": 43.31109619140625, "learning_rate": 5e-06, "loss": 1.8354, "num_input_tokens_seen": 124728, "step": 2 }, { "epoch": 0.0066555740432612314, "loss": 1.5098202228546143, "loss_ce": 0.0459531769156456, "loss_iou": 0.51953125, "loss_num": 0.0849609375, "loss_xval": 1.4609375, "num_input_tokens_seen": 124728, "step": 2 }, { "epoch": 0.009983361064891847, "grad_norm": 40.23197555541992, "learning_rate": 5e-06, "loss": 1.8923, "num_input_tokens_seen": 187516, "step": 3 }, { "epoch": 0.009983361064891847, "loss": 1.9805556535720825, "loss_ce": 0.46248918771743774, "loss_iou": 0.4375, "loss_num": 0.12890625, "loss_xval": 1.515625, "num_input_tokens_seen": 187516, "step": 3 }, { "epoch": 0.013311148086522463, "grad_norm": 41.29404067993164, "learning_rate": 5e-06, "loss": 1.8433, "num_input_tokens_seen": 249256, "step": 4 }, { "epoch": 0.013311148086522463, "loss": 1.760237216949463, "loss_ce": 0.29978805780410767, "loss_iou": 0.474609375, "loss_num": 0.10205078125, "loss_xval": 1.4609375, "num_input_tokens_seen": 249256, "step": 4 }, { "epoch": 0.016638935108153077, "grad_norm": 45.62328338623047, "learning_rate": 5e-06, "loss": 1.9853, "num_input_tokens_seen": 312896, "step": 5 }, { "epoch": 0.016638935108153077, "loss": 1.6446335315704346, "loss_ce": 0.36777815222740173, "loss_iou": 0.38671875, "loss_num": 0.1005859375, "loss_xval": 1.2734375, "num_input_tokens_seen": 312896, "step": 5 }, { "epoch": 0.019966722129783693, "grad_norm": 26.73392677307129, "learning_rate": 5e-06, "loss": 1.5267, "num_input_tokens_seen": 373800, "step": 6 }, { "epoch": 0.019966722129783693, "loss": 1.481225848197937, "loss_ce": 0.186303973197937, "loss_iou": 0.453125, "loss_num": 0.0771484375, "loss_xval": 1.296875, "num_input_tokens_seen": 373800, "step": 6 }, { "epoch": 0.02329450915141431, "grad_norm": 28.28041648864746, "learning_rate": 5e-06, "loss": 1.8401, "num_input_tokens_seen": 435532, "step": 7 }, { "epoch": 0.02329450915141431, "loss": 1.7152678966522217, "loss_ce": 0.2172209620475769, "loss_iou": 0.3515625, "loss_num": 0.158203125, "loss_xval": 1.5, "num_input_tokens_seen": 435532, "step": 7 }, { "epoch": 0.026622296173044926, "grad_norm": 93.13752746582031, "learning_rate": 5e-06, "loss": 1.7189, "num_input_tokens_seen": 498240, "step": 8 }, { "epoch": 0.026622296173044926, "loss": 1.7084490060806274, "loss_ce": 0.12007011473178864, "loss_iou": 0.49609375, "loss_num": 0.11865234375, "loss_xval": 1.5859375, "num_input_tokens_seen": 498240, "step": 8 }, { "epoch": 0.029950083194675542, "grad_norm": 55.754974365234375, "learning_rate": 5e-06, "loss": 1.9588, "num_input_tokens_seen": 560944, "step": 9 }, { "epoch": 0.029950083194675542, "loss": 1.9288438558578491, "loss_ce": 0.43592390418052673, "loss_iou": 0.376953125, "loss_num": 0.1474609375, "loss_xval": 1.4921875, "num_input_tokens_seen": 560944, "step": 9 }, { "epoch": 0.033277870216306155, "grad_norm": 24.77700424194336, "learning_rate": 5e-06, "loss": 1.7834, "num_input_tokens_seen": 623276, "step": 10 }, { "epoch": 0.033277870216306155, "loss": 1.2600505352020264, "loss_ce": 0.17655442655086517, "loss_iou": 0.169921875, "loss_num": 0.1494140625, "loss_xval": 1.0859375, "num_input_tokens_seen": 623276, "step": 10 }, { "epoch": 0.036605657237936774, "grad_norm": 53.40562438964844, "learning_rate": 5e-06, "loss": 1.9654, "num_input_tokens_seen": 686344, "step": 11 }, { "epoch": 0.036605657237936774, "loss": 1.5656721591949463, "loss_ce": 0.15307454764842987, "loss_iou": 0.32421875, "loss_num": 0.15234375, "loss_xval": 1.4140625, "num_input_tokens_seen": 686344, "step": 11 }, { "epoch": 0.03993344425956739, "grad_norm": 185.1483612060547, "learning_rate": 5e-06, "loss": 1.5234, "num_input_tokens_seen": 748796, "step": 12 }, { "epoch": 0.03993344425956739, "loss": 0.9671105146408081, "loss_ce": 0.1262902468442917, "loss_iou": 0.0, "loss_num": 0.16796875, "loss_xval": 0.83984375, "num_input_tokens_seen": 748796, "step": 12 }, { "epoch": 0.04326123128119801, "grad_norm": 41.721710205078125, "learning_rate": 5e-06, "loss": 2.2144, "num_input_tokens_seen": 811248, "step": 13 }, { "epoch": 0.04326123128119801, "loss": 1.7121745347976685, "loss_ce": 0.382096529006958, "loss_iou": 0.34765625, "loss_num": 0.126953125, "loss_xval": 1.328125, "num_input_tokens_seen": 811248, "step": 13 }, { "epoch": 0.04658901830282862, "grad_norm": 26.450149536132812, "learning_rate": 5e-06, "loss": 2.0257, "num_input_tokens_seen": 874336, "step": 14 }, { "epoch": 0.04658901830282862, "loss": 2.167698383331299, "loss_ce": 0.07883133739233017, "loss_iou": 0.65234375, "loss_num": 0.15625, "loss_xval": 2.09375, "num_input_tokens_seen": 874336, "step": 14 }, { "epoch": 0.04991680532445923, "grad_norm": 28.79014015197754, "learning_rate": 5e-06, "loss": 1.5748, "num_input_tokens_seen": 934104, "step": 15 }, { "epoch": 0.04991680532445923, "loss": 1.746118426322937, "loss_ce": 0.3081301152706146, "loss_iou": 0.34375, "loss_num": 0.150390625, "loss_xval": 1.4375, "num_input_tokens_seen": 934104, "step": 15 }, { "epoch": 0.05324459234608985, "grad_norm": 35.701480865478516, "learning_rate": 5e-06, "loss": 1.6555, "num_input_tokens_seen": 995740, "step": 16 }, { "epoch": 0.05324459234608985, "loss": 1.4585609436035156, "loss_ce": 0.164127379655838, "loss_iou": 0.318359375, "loss_num": 0.1318359375, "loss_xval": 1.296875, "num_input_tokens_seen": 995740, "step": 16 }, { "epoch": 0.056572379367720464, "grad_norm": 31.904708862304688, "learning_rate": 5e-06, "loss": 1.4605, "num_input_tokens_seen": 1057552, "step": 17 }, { "epoch": 0.056572379367720464, "loss": 1.500415563583374, "loss_ce": 0.4437748193740845, "loss_iou": 0.26171875, "loss_num": 0.10693359375, "loss_xval": 1.0546875, "num_input_tokens_seen": 1057552, "step": 17 }, { "epoch": 0.059900166389351084, "grad_norm": 26.225719451904297, "learning_rate": 5e-06, "loss": 1.8686, "num_input_tokens_seen": 1117304, "step": 18 }, { "epoch": 0.059900166389351084, "loss": 2.0437541007995605, "loss_ce": 0.25664472579956055, "loss_iou": 0.546875, "loss_num": 0.1376953125, "loss_xval": 1.7890625, "num_input_tokens_seen": 1117304, "step": 18 }, { "epoch": 0.0632279534109817, "grad_norm": 107.4391098022461, "learning_rate": 5e-06, "loss": 1.8643, "num_input_tokens_seen": 1180152, "step": 19 }, { "epoch": 0.0632279534109817, "loss": 1.5854601860046387, "loss_ce": 0.12549912929534912, "loss_iou": 0.359375, "loss_num": 0.1484375, "loss_xval": 1.4609375, "num_input_tokens_seen": 1180152, "step": 19 }, { "epoch": 0.06655574043261231, "grad_norm": 31.948070526123047, "learning_rate": 5e-06, "loss": 1.6704, "num_input_tokens_seen": 1240584, "step": 20 }, { "epoch": 0.06655574043261231, "loss": 1.782106876373291, "loss_ce": 0.6380637884140015, "loss_iou": 0.203125, "loss_num": 0.1474609375, "loss_xval": 1.140625, "num_input_tokens_seen": 1240584, "step": 20 }, { "epoch": 0.06988352745424292, "grad_norm": 39.34145736694336, "learning_rate": 5e-06, "loss": 1.8904, "num_input_tokens_seen": 1301508, "step": 21 }, { "epoch": 0.06988352745424292, "loss": 1.6883811950683594, "loss_ce": 0.6380882263183594, "loss_iou": 0.2470703125, "loss_num": 0.111328125, "loss_xval": 1.046875, "num_input_tokens_seen": 1301508, "step": 21 }, { "epoch": 0.07321131447587355, "grad_norm": 28.05674934387207, "learning_rate": 5e-06, "loss": 2.0444, "num_input_tokens_seen": 1363652, "step": 22 }, { "epoch": 0.07321131447587355, "loss": 2.08927845954895, "loss_ce": 0.1547081470489502, "loss_iou": 0.5625, "loss_num": 0.162109375, "loss_xval": 1.9375, "num_input_tokens_seen": 1363652, "step": 22 }, { "epoch": 0.07653910149750416, "grad_norm": 34.66334915161133, "learning_rate": 5e-06, "loss": 2.0099, "num_input_tokens_seen": 1427244, "step": 23 }, { "epoch": 0.07653910149750416, "loss": 2.1946048736572266, "loss_ce": 0.20241737365722656, "loss_iou": 0.640625, "loss_num": 0.142578125, "loss_xval": 1.9921875, "num_input_tokens_seen": 1427244, "step": 23 }, { "epoch": 0.07986688851913477, "grad_norm": 16.318044662475586, "learning_rate": 5e-06, "loss": 1.6373, "num_input_tokens_seen": 1488232, "step": 24 }, { "epoch": 0.07986688851913477, "loss": 1.2525696754455566, "loss_ce": 0.21619272232055664, "loss_iou": 0.251953125, "loss_num": 0.1064453125, "loss_xval": 1.0390625, "num_input_tokens_seen": 1488232, "step": 24 }, { "epoch": 0.08319467554076539, "grad_norm": 22.026212692260742, "learning_rate": 5e-06, "loss": 1.8012, "num_input_tokens_seen": 1549668, "step": 25 }, { "epoch": 0.08319467554076539, "loss": 1.860095500946045, "loss_ce": 0.44945091009140015, "loss_iou": 0.30859375, "loss_num": 0.1591796875, "loss_xval": 1.4140625, "num_input_tokens_seen": 1549668, "step": 25 }, { "epoch": 0.08652246256239601, "grad_norm": 26.607149124145508, "learning_rate": 5e-06, "loss": 1.898, "num_input_tokens_seen": 1612748, "step": 26 }, { "epoch": 0.08652246256239601, "loss": 1.7494425773620605, "loss_ce": 0.19084876775741577, "loss_iou": 0.4375, "loss_num": 0.13671875, "loss_xval": 1.5625, "num_input_tokens_seen": 1612748, "step": 26 }, { "epoch": 0.08985024958402663, "grad_norm": 100.14049530029297, "learning_rate": 5e-06, "loss": 1.9263, "num_input_tokens_seen": 1675456, "step": 27 }, { "epoch": 0.08985024958402663, "loss": 1.7160944938659668, "loss_ce": 0.20193445682525635, "loss_iou": 0.5078125, "loss_num": 0.099609375, "loss_xval": 1.515625, "num_input_tokens_seen": 1675456, "step": 27 }, { "epoch": 0.09317803660565724, "grad_norm": 31.630752563476562, "learning_rate": 5e-06, "loss": 2.0261, "num_input_tokens_seen": 1736840, "step": 28 }, { "epoch": 0.09317803660565724, "loss": 1.7389755249023438, "loss_ce": 0.17842870950698853, "loss_iou": 0.42578125, "loss_num": 0.1416015625, "loss_xval": 1.5625, "num_input_tokens_seen": 1736840, "step": 28 }, { "epoch": 0.09650582362728785, "grad_norm": 23.318147659301758, "learning_rate": 5e-06, "loss": 1.7689, "num_input_tokens_seen": 1799508, "step": 29 }, { "epoch": 0.09650582362728785, "loss": 1.8038142919540405, "loss_ce": 0.46494707465171814, "loss_iou": 0.357421875, "loss_num": 0.12451171875, "loss_xval": 1.3359375, "num_input_tokens_seen": 1799508, "step": 29 }, { "epoch": 0.09983361064891846, "grad_norm": 20.128877639770508, "learning_rate": 5e-06, "loss": 2.0295, "num_input_tokens_seen": 1862340, "step": 30 }, { "epoch": 0.09983361064891846, "loss": 2.387416362762451, "loss_ce": 0.5177874565124512, "loss_iou": 0.53125, "loss_num": 0.162109375, "loss_xval": 1.8671875, "num_input_tokens_seen": 1862340, "step": 30 }, { "epoch": 0.10316139767054909, "grad_norm": 102.55489349365234, "learning_rate": 5e-06, "loss": 1.695, "num_input_tokens_seen": 1926308, "step": 31 }, { "epoch": 0.10316139767054909, "loss": 1.4592796564102173, "loss_ce": 0.12383048981428146, "loss_iou": 0.447265625, "loss_num": 0.0888671875, "loss_xval": 1.3359375, "num_input_tokens_seen": 1926308, "step": 31 }, { "epoch": 0.1064891846921797, "grad_norm": 24.122255325317383, "learning_rate": 5e-06, "loss": 1.5715, "num_input_tokens_seen": 1987296, "step": 32 }, { "epoch": 0.1064891846921797, "loss": 1.2685531377792358, "loss_ce": 0.11279135942459106, "loss_iou": 0.1845703125, "loss_num": 0.1572265625, "loss_xval": 1.15625, "num_input_tokens_seen": 1987296, "step": 32 }, { "epoch": 0.10981697171381032, "grad_norm": 29.521120071411133, "learning_rate": 5e-06, "loss": 1.9234, "num_input_tokens_seen": 2049684, "step": 33 }, { "epoch": 0.10981697171381032, "loss": 2.4738051891326904, "loss_ce": 0.3732193112373352, "loss_iou": 0.62109375, "loss_num": 0.171875, "loss_xval": 2.09375, "num_input_tokens_seen": 2049684, "step": 33 }, { "epoch": 0.11314475873544093, "grad_norm": 33.83547592163086, "learning_rate": 5e-06, "loss": 2.2031, "num_input_tokens_seen": 2112524, "step": 34 }, { "epoch": 0.11314475873544093, "loss": 2.4939706325531006, "loss_ce": 0.40705662965774536, "loss_iou": 0.63671875, "loss_num": 0.162109375, "loss_xval": 2.09375, "num_input_tokens_seen": 2112524, "step": 34 }, { "epoch": 0.11647254575707154, "grad_norm": 28.041799545288086, "learning_rate": 5e-06, "loss": 1.9234, "num_input_tokens_seen": 2174764, "step": 35 }, { "epoch": 0.11647254575707154, "loss": 2.1027140617370605, "loss_ce": 0.321463942527771, "loss_iou": 0.404296875, "loss_num": 0.1953125, "loss_xval": 1.78125, "num_input_tokens_seen": 2174764, "step": 35 }, { "epoch": 0.11980033277870217, "grad_norm": 16.05351448059082, "learning_rate": 5e-06, "loss": 1.6234, "num_input_tokens_seen": 2236936, "step": 36 }, { "epoch": 0.11980033277870217, "loss": 1.9183712005615234, "loss_ce": 0.2826289236545563, "loss_iou": 0.53515625, "loss_num": 0.11376953125, "loss_xval": 1.6328125, "num_input_tokens_seen": 2236936, "step": 36 }, { "epoch": 0.12312811980033278, "grad_norm": 25.214027404785156, "learning_rate": 5e-06, "loss": 1.7429, "num_input_tokens_seen": 2297968, "step": 37 }, { "epoch": 0.12312811980033278, "loss": 1.8933910131454468, "loss_ce": 0.352375328540802, "loss_iou": 0.453125, "loss_num": 0.126953125, "loss_xval": 1.5390625, "num_input_tokens_seen": 2297968, "step": 37 }, { "epoch": 0.1264559068219634, "grad_norm": 41.4306526184082, "learning_rate": 5e-06, "loss": 1.9087, "num_input_tokens_seen": 2359480, "step": 38 }, { "epoch": 0.1264559068219634, "loss": 1.9372410774230957, "loss_ce": 0.3669285476207733, "loss_iou": 0.44921875, "loss_num": 0.134765625, "loss_xval": 1.5703125, "num_input_tokens_seen": 2359480, "step": 38 }, { "epoch": 0.129783693843594, "grad_norm": 87.06895446777344, "learning_rate": 5e-06, "loss": 1.4617, "num_input_tokens_seen": 2422388, "step": 39 }, { "epoch": 0.129783693843594, "loss": 1.5843279361724854, "loss_ce": 0.26157402992248535, "loss_iou": 0.408203125, "loss_num": 0.10107421875, "loss_xval": 1.3203125, "num_input_tokens_seen": 2422388, "step": 39 }, { "epoch": 0.13311148086522462, "grad_norm": 22.859128952026367, "learning_rate": 5e-06, "loss": 1.7171, "num_input_tokens_seen": 2485508, "step": 40 }, { "epoch": 0.13311148086522462, "loss": 1.6260865926742554, "loss_ce": 0.1578247845172882, "loss_iou": 0.376953125, "loss_num": 0.142578125, "loss_xval": 1.46875, "num_input_tokens_seen": 2485508, "step": 40 }, { "epoch": 0.13643926788685523, "grad_norm": 29.294437408447266, "learning_rate": 5e-06, "loss": 1.6604, "num_input_tokens_seen": 2547740, "step": 41 }, { "epoch": 0.13643926788685523, "loss": 1.5736039876937866, "loss_ce": 0.11828167736530304, "loss_iou": 0.384765625, "loss_num": 0.1376953125, "loss_xval": 1.453125, "num_input_tokens_seen": 2547740, "step": 41 }, { "epoch": 0.13976705490848584, "grad_norm": 22.002099990844727, "learning_rate": 5e-06, "loss": 1.9157, "num_input_tokens_seen": 2610656, "step": 42 }, { "epoch": 0.13976705490848584, "loss": 2.026005744934082, "loss_ce": 0.26331043243408203, "loss_iou": 0.515625, "loss_num": 0.146484375, "loss_xval": 1.765625, "num_input_tokens_seen": 2610656, "step": 42 }, { "epoch": 0.14309484193011648, "grad_norm": 42.256507873535156, "learning_rate": 5e-06, "loss": 1.9787, "num_input_tokens_seen": 2671692, "step": 43 }, { "epoch": 0.14309484193011648, "loss": 1.8853733539581299, "loss_ce": 0.2652561664581299, "loss_iou": 0.50390625, "loss_num": 0.12158203125, "loss_xval": 1.6171875, "num_input_tokens_seen": 2671692, "step": 43 }, { "epoch": 0.1464226289517471, "grad_norm": 23.313539505004883, "learning_rate": 5e-06, "loss": 1.97, "num_input_tokens_seen": 2734876, "step": 44 }, { "epoch": 0.1464226289517471, "loss": 2.3043510913848877, "loss_ce": 0.4186089336872101, "loss_iou": 0.58203125, "loss_num": 0.1435546875, "loss_xval": 1.8828125, "num_input_tokens_seen": 2734876, "step": 44 }, { "epoch": 0.1497504159733777, "grad_norm": 19.345558166503906, "learning_rate": 5e-06, "loss": 1.9084, "num_input_tokens_seen": 2798924, "step": 45 }, { "epoch": 0.1497504159733777, "loss": 1.7593090534210205, "loss_ce": 0.1675121784210205, "loss_iou": 0.50390625, "loss_num": 0.1171875, "loss_xval": 1.59375, "num_input_tokens_seen": 2798924, "step": 45 }, { "epoch": 0.15307820299500832, "grad_norm": 56.2034912109375, "learning_rate": 5e-06, "loss": 1.5545, "num_input_tokens_seen": 2862396, "step": 46 }, { "epoch": 0.15307820299500832, "loss": 1.1661244630813599, "loss_ce": 0.09337049722671509, "loss_iou": 0.306640625, "loss_num": 0.091796875, "loss_xval": 1.0703125, "num_input_tokens_seen": 2862396, "step": 46 }, { "epoch": 0.15640599001663893, "grad_norm": 15.956643104553223, "learning_rate": 5e-06, "loss": 1.6658, "num_input_tokens_seen": 2926024, "step": 47 }, { "epoch": 0.15640599001663893, "loss": 1.5099748373031616, "loss_ce": 0.054896753281354904, "loss_iou": 0.50390625, "loss_num": 0.08984375, "loss_xval": 1.453125, "num_input_tokens_seen": 2926024, "step": 47 }, { "epoch": 0.15973377703826955, "grad_norm": 25.402143478393555, "learning_rate": 5e-06, "loss": 1.8769, "num_input_tokens_seen": 2989532, "step": 48 }, { "epoch": 0.15973377703826955, "loss": 1.6056416034698486, "loss_ce": 0.17790716886520386, "loss_iou": 0.44140625, "loss_num": 0.10888671875, "loss_xval": 1.4296875, "num_input_tokens_seen": 2989532, "step": 48 }, { "epoch": 0.16306156405990016, "grad_norm": 26.91611671447754, "learning_rate": 5e-06, "loss": 1.8648, "num_input_tokens_seen": 3052660, "step": 49 }, { "epoch": 0.16306156405990016, "loss": 1.767176866531372, "loss_ce": 0.14803630113601685, "loss_iou": 0.515625, "loss_num": 0.1181640625, "loss_xval": 1.6171875, "num_input_tokens_seen": 3052660, "step": 49 }, { "epoch": 0.16638935108153077, "grad_norm": 17.735488891601562, "learning_rate": 5e-06, "loss": 1.733, "num_input_tokens_seen": 3114048, "step": 50 }, { "epoch": 0.16638935108153077, "loss": 1.8018484115600586, "loss_ce": 0.201262429356575, "loss_iou": 0.369140625, "loss_num": 0.171875, "loss_xval": 1.6015625, "num_input_tokens_seen": 3114048, "step": 50 }, { "epoch": 0.16971713810316139, "grad_norm": 23.010356903076172, "learning_rate": 5e-06, "loss": 1.7252, "num_input_tokens_seen": 3177028, "step": 51 }, { "epoch": 0.16971713810316139, "loss": 1.9260194301605225, "loss_ce": 0.23461312055587769, "loss_iou": 0.5390625, "loss_num": 0.1220703125, "loss_xval": 1.6875, "num_input_tokens_seen": 3177028, "step": 51 }, { "epoch": 0.17304492512479203, "grad_norm": 50.80724334716797, "learning_rate": 5e-06, "loss": 1.5932, "num_input_tokens_seen": 3240048, "step": 52 }, { "epoch": 0.17304492512479203, "loss": 1.2517362833023071, "loss_ce": 0.07107216864824295, "loss_iou": 0.3046875, "loss_num": 0.1142578125, "loss_xval": 1.1796875, "num_input_tokens_seen": 3240048, "step": 52 }, { "epoch": 0.17637271214642264, "grad_norm": 19.88364601135254, "learning_rate": 5e-06, "loss": 1.7949, "num_input_tokens_seen": 3302084, "step": 53 }, { "epoch": 0.17637271214642264, "loss": 1.9920969009399414, "loss_ce": 0.1786203682422638, "loss_iou": 0.5703125, "loss_num": 0.1337890625, "loss_xval": 1.8125, "num_input_tokens_seen": 3302084, "step": 53 }, { "epoch": 0.17970049916805325, "grad_norm": 15.960768699645996, "learning_rate": 5e-06, "loss": 1.8404, "num_input_tokens_seen": 3364032, "step": 54 }, { "epoch": 0.17970049916805325, "loss": 1.923379898071289, "loss_ce": 0.3872470259666443, "loss_iou": 0.455078125, "loss_num": 0.125, "loss_xval": 1.5390625, "num_input_tokens_seen": 3364032, "step": 54 }, { "epoch": 0.18302828618968386, "grad_norm": 34.17069625854492, "learning_rate": 5e-06, "loss": 1.6571, "num_input_tokens_seen": 3426944, "step": 55 }, { "epoch": 0.18302828618968386, "loss": 1.422314167022705, "loss_ce": 0.04633765295147896, "loss_iou": 0.37109375, "loss_num": 0.126953125, "loss_xval": 1.375, "num_input_tokens_seen": 3426944, "step": 55 }, { "epoch": 0.18635607321131448, "grad_norm": 14.88204288482666, "learning_rate": 5e-06, "loss": 1.7171, "num_input_tokens_seen": 3488980, "step": 56 }, { "epoch": 0.18635607321131448, "loss": 1.6802122592926025, "loss_ce": 0.12698963284492493, "loss_iou": 0.46484375, "loss_num": 0.1240234375, "loss_xval": 1.5546875, "num_input_tokens_seen": 3488980, "step": 56 }, { "epoch": 0.1896838602329451, "grad_norm": 25.315814971923828, "learning_rate": 5e-06, "loss": 1.5849, "num_input_tokens_seen": 3552192, "step": 57 }, { "epoch": 0.1896838602329451, "loss": 1.5455249547958374, "loss_ce": 0.09777102619409561, "loss_iou": 0.4609375, "loss_num": 0.10498046875, "loss_xval": 1.4453125, "num_input_tokens_seen": 3552192, "step": 57 }, { "epoch": 0.1930116472545757, "grad_norm": 52.65762710571289, "learning_rate": 5e-06, "loss": 1.7184, "num_input_tokens_seen": 3614784, "step": 58 }, { "epoch": 0.1930116472545757, "loss": 1.7533233165740967, "loss_ce": 0.1888701170682907, "loss_iou": 0.462890625, "loss_num": 0.1279296875, "loss_xval": 1.5625, "num_input_tokens_seen": 3614784, "step": 58 }, { "epoch": 0.19633943427620631, "grad_norm": 10.436413764953613, "learning_rate": 5e-06, "loss": 1.5083, "num_input_tokens_seen": 3676144, "step": 59 }, { "epoch": 0.19633943427620631, "loss": 1.4122167825698853, "loss_ce": 0.03331056609749794, "loss_iou": 0.3359375, "loss_num": 0.1416015625, "loss_xval": 1.375, "num_input_tokens_seen": 3676144, "step": 59 }, { "epoch": 0.19966722129783693, "grad_norm": 23.417434692382812, "learning_rate": 5e-06, "loss": 1.513, "num_input_tokens_seen": 3740152, "step": 60 }, { "epoch": 0.19966722129783693, "loss": 1.52932870388031, "loss_ce": 0.15628191828727722, "loss_iou": 0.4453125, "loss_num": 0.0966796875, "loss_xval": 1.375, "num_input_tokens_seen": 3740152, "step": 60 }, { "epoch": 0.20299500831946754, "grad_norm": 57.756534576416016, "learning_rate": 5e-06, "loss": 1.9057, "num_input_tokens_seen": 3803536, "step": 61 }, { "epoch": 0.20299500831946754, "loss": 2.161196708679199, "loss_ce": 0.07525897026062012, "loss_iou": 0.6328125, "loss_num": 0.1640625, "loss_xval": 2.09375, "num_input_tokens_seen": 3803536, "step": 61 }, { "epoch": 0.20632279534109818, "grad_norm": 34.947166442871094, "learning_rate": 5e-06, "loss": 1.5023, "num_input_tokens_seen": 3866352, "step": 62 }, { "epoch": 0.20632279534109818, "loss": 1.5223082304000854, "loss_ce": 0.03939810022711754, "loss_iou": 0.4453125, "loss_num": 0.11865234375, "loss_xval": 1.484375, "num_input_tokens_seen": 3866352, "step": 62 }, { "epoch": 0.2096505823627288, "grad_norm": 27.8328800201416, "learning_rate": 5e-06, "loss": 1.6887, "num_input_tokens_seen": 3929720, "step": 63 }, { "epoch": 0.2096505823627288, "loss": 1.6723523139953613, "loss_ce": 0.13377803564071655, "loss_iou": 0.43359375, "loss_num": 0.1337890625, "loss_xval": 1.5390625, "num_input_tokens_seen": 3929720, "step": 63 }, { "epoch": 0.2129783693843594, "grad_norm": 28.632810592651367, "learning_rate": 5e-06, "loss": 1.8225, "num_input_tokens_seen": 3993200, "step": 64 }, { "epoch": 0.2129783693843594, "loss": 2.061736583709717, "loss_ce": 0.2531428337097168, "loss_iou": 0.5703125, "loss_num": 0.1328125, "loss_xval": 1.8125, "num_input_tokens_seen": 3993200, "step": 64 }, { "epoch": 0.21630615640599002, "grad_norm": 26.3468074798584, "learning_rate": 5e-06, "loss": 1.5015, "num_input_tokens_seen": 4055156, "step": 65 }, { "epoch": 0.21630615640599002, "loss": 1.671527624130249, "loss_ce": 0.03480884060263634, "loss_iou": 0.470703125, "loss_num": 0.1396484375, "loss_xval": 1.640625, "num_input_tokens_seen": 4055156, "step": 65 }, { "epoch": 0.21963394342762063, "grad_norm": 28.5969295501709, "learning_rate": 5e-06, "loss": 1.621, "num_input_tokens_seen": 4117564, "step": 66 }, { "epoch": 0.21963394342762063, "loss": 1.391996145248413, "loss_ce": 0.03555082529783249, "loss_iou": 0.462890625, "loss_num": 0.08642578125, "loss_xval": 1.359375, "num_input_tokens_seen": 4117564, "step": 66 }, { "epoch": 0.22296173044925124, "grad_norm": 13.81509780883789, "learning_rate": 5e-06, "loss": 1.4978, "num_input_tokens_seen": 4181136, "step": 67 }, { "epoch": 0.22296173044925124, "loss": 1.482597827911377, "loss_ce": 0.14080099761486053, "loss_iou": 0.435546875, "loss_num": 0.09375, "loss_xval": 1.34375, "num_input_tokens_seen": 4181136, "step": 67 }, { "epoch": 0.22628951747088186, "grad_norm": 22.297697067260742, "learning_rate": 5e-06, "loss": 1.5529, "num_input_tokens_seen": 4244384, "step": 68 }, { "epoch": 0.22628951747088186, "loss": 1.800987720489502, "loss_ce": 0.0724719688296318, "loss_iou": 0.5, "loss_num": 0.1455078125, "loss_xval": 1.7265625, "num_input_tokens_seen": 4244384, "step": 68 }, { "epoch": 0.22961730449251247, "grad_norm": 17.73756217956543, "learning_rate": 5e-06, "loss": 1.6361, "num_input_tokens_seen": 4306480, "step": 69 }, { "epoch": 0.22961730449251247, "loss": 1.7959282398223877, "loss_ce": 0.25491267442703247, "loss_iou": 0.427734375, "loss_num": 0.1376953125, "loss_xval": 1.5390625, "num_input_tokens_seen": 4306480, "step": 69 }, { "epoch": 0.23294509151414308, "grad_norm": 28.785377502441406, "learning_rate": 5e-06, "loss": 1.6575, "num_input_tokens_seen": 4368812, "step": 70 }, { "epoch": 0.23294509151414308, "loss": 1.6283104419708252, "loss_ce": 0.05555645376443863, "loss_iou": 0.4375, "loss_num": 0.1396484375, "loss_xval": 1.5703125, "num_input_tokens_seen": 4368812, "step": 70 }, { "epoch": 0.23627287853577372, "grad_norm": 15.95328140258789, "learning_rate": 5e-06, "loss": 1.923, "num_input_tokens_seen": 4432212, "step": 71 }, { "epoch": 0.23627287853577372, "loss": 2.040701389312744, "loss_ce": 0.4274199604988098, "loss_iou": 0.53125, "loss_num": 0.10888671875, "loss_xval": 1.609375, "num_input_tokens_seen": 4432212, "step": 71 }, { "epoch": 0.23960066555740434, "grad_norm": 15.339340209960938, "learning_rate": 5e-06, "loss": 1.6562, "num_input_tokens_seen": 4495800, "step": 72 }, { "epoch": 0.23960066555740434, "loss": 1.5833992958068848, "loss_ce": 0.07705167680978775, "loss_iou": 0.3984375, "loss_num": 0.1416015625, "loss_xval": 1.5078125, "num_input_tokens_seen": 4495800, "step": 72 }, { "epoch": 0.24292845257903495, "grad_norm": 64.54541778564453, "learning_rate": 5e-06, "loss": 1.9285, "num_input_tokens_seen": 4558428, "step": 73 }, { "epoch": 0.24292845257903495, "loss": 1.8927369117736816, "loss_ce": 0.13980722427368164, "loss_iou": 0.5546875, "loss_num": 0.12890625, "loss_xval": 1.75, "num_input_tokens_seen": 4558428, "step": 73 }, { "epoch": 0.24625623960066556, "grad_norm": 32.903106689453125, "learning_rate": 5e-06, "loss": 2.1965, "num_input_tokens_seen": 4622312, "step": 74 }, { "epoch": 0.24625623960066556, "loss": 1.6801223754882812, "loss_ce": 0.08515171706676483, "loss_iou": 0.49609375, "loss_num": 0.12109375, "loss_xval": 1.59375, "num_input_tokens_seen": 4622312, "step": 74 }, { "epoch": 0.24958402662229617, "grad_norm": 18.268884658813477, "learning_rate": 5e-06, "loss": 1.696, "num_input_tokens_seen": 4685228, "step": 75 }, { "epoch": 0.24958402662229617, "loss": 2.0499777793884277, "loss_ce": 0.2775166630744934, "loss_iou": 0.494140625, "loss_num": 0.1572265625, "loss_xval": 1.7734375, "num_input_tokens_seen": 4685228, "step": 75 }, { "epoch": 0.2529118136439268, "grad_norm": 15.21312141418457, "learning_rate": 5e-06, "loss": 1.3746, "num_input_tokens_seen": 4746040, "step": 76 }, { "epoch": 0.2529118136439268, "loss": 1.2260797023773193, "loss_ce": 0.06128474697470665, "loss_iou": 0.248046875, "loss_num": 0.1337890625, "loss_xval": 1.1640625, "num_input_tokens_seen": 4746040, "step": 76 }, { "epoch": 0.2562396006655574, "grad_norm": 24.16434097290039, "learning_rate": 5e-06, "loss": 1.6829, "num_input_tokens_seen": 4809160, "step": 77 }, { "epoch": 0.2562396006655574, "loss": 1.6074845790863037, "loss_ce": 0.13531659543514252, "loss_iou": 0.46875, "loss_num": 0.1064453125, "loss_xval": 1.46875, "num_input_tokens_seen": 4809160, "step": 77 }, { "epoch": 0.259567387687188, "grad_norm": 36.885498046875, "learning_rate": 5e-06, "loss": 1.6743, "num_input_tokens_seen": 4872948, "step": 78 }, { "epoch": 0.259567387687188, "loss": 1.5818986892700195, "loss_ce": 0.03892991691827774, "loss_iou": 0.4453125, "loss_num": 0.130859375, "loss_xval": 1.546875, "num_input_tokens_seen": 4872948, "step": 78 }, { "epoch": 0.2628951747088186, "grad_norm": 12.761034965515137, "learning_rate": 5e-06, "loss": 1.3517, "num_input_tokens_seen": 4934300, "step": 79 }, { "epoch": 0.2628951747088186, "loss": 1.0315775871276855, "loss_ce": 0.014487742446362972, "loss_iou": 0.287109375, "loss_num": 0.0888671875, "loss_xval": 1.015625, "num_input_tokens_seen": 4934300, "step": 79 }, { "epoch": 0.26622296173044924, "grad_norm": 13.58120059967041, "learning_rate": 5e-06, "loss": 1.6854, "num_input_tokens_seen": 4997784, "step": 80 }, { "epoch": 0.26622296173044924, "loss": 1.634024739265442, "loss_ce": 0.04027477279305458, "loss_iou": 0.51953125, "loss_num": 0.111328125, "loss_xval": 1.59375, "num_input_tokens_seen": 4997784, "step": 80 }, { "epoch": 0.26955074875207985, "grad_norm": 24.133459091186523, "learning_rate": 5e-06, "loss": 1.4397, "num_input_tokens_seen": 5061392, "step": 81 }, { "epoch": 0.26955074875207985, "loss": 1.667598009109497, "loss_ce": 0.2418166995048523, "loss_iou": 0.384765625, "loss_num": 0.1318359375, "loss_xval": 1.421875, "num_input_tokens_seen": 5061392, "step": 81 }, { "epoch": 0.27287853577371046, "grad_norm": 15.399471282958984, "learning_rate": 5e-06, "loss": 1.3346, "num_input_tokens_seen": 5124660, "step": 82 }, { "epoch": 0.27287853577371046, "loss": 1.3418378829956055, "loss_ce": 0.09183788299560547, "loss_iou": 0.400390625, "loss_num": 0.08984375, "loss_xval": 1.25, "num_input_tokens_seen": 5124660, "step": 82 }, { "epoch": 0.2762063227953411, "grad_norm": 19.83667755126953, "learning_rate": 5e-06, "loss": 1.6332, "num_input_tokens_seen": 5186724, "step": 83 }, { "epoch": 0.2762063227953411, "loss": 1.8349742889404297, "loss_ce": 0.2241344451904297, "loss_iou": 0.447265625, "loss_num": 0.1435546875, "loss_xval": 1.609375, "num_input_tokens_seen": 5186724, "step": 83 }, { "epoch": 0.2795341098169717, "grad_norm": 15.304369926452637, "learning_rate": 5e-06, "loss": 1.4917, "num_input_tokens_seen": 5248960, "step": 84 }, { "epoch": 0.2795341098169717, "loss": 1.3170514106750488, "loss_ce": 0.10513728857040405, "loss_iou": 0.314453125, "loss_num": 0.11669921875, "loss_xval": 1.2109375, "num_input_tokens_seen": 5248960, "step": 84 }, { "epoch": 0.28286189683860236, "grad_norm": 30.498498916625977, "learning_rate": 5e-06, "loss": 1.7189, "num_input_tokens_seen": 5312544, "step": 85 }, { "epoch": 0.28286189683860236, "loss": 1.7351880073547363, "loss_ce": 0.04475831985473633, "loss_iou": 0.5, "loss_num": 0.1376953125, "loss_xval": 1.6875, "num_input_tokens_seen": 5312544, "step": 85 }, { "epoch": 0.28618968386023297, "grad_norm": 30.64577865600586, "learning_rate": 5e-06, "loss": 1.959, "num_input_tokens_seen": 5375812, "step": 86 }, { "epoch": 0.28618968386023297, "loss": 2.172987461090088, "loss_ce": 0.3106827139854431, "loss_iou": 0.578125, "loss_num": 0.140625, "loss_xval": 1.859375, "num_input_tokens_seen": 5375812, "step": 86 }, { "epoch": 0.2895174708818636, "grad_norm": 23.82610321044922, "learning_rate": 5e-06, "loss": 1.6742, "num_input_tokens_seen": 5438272, "step": 87 }, { "epoch": 0.2895174708818636, "loss": 1.6248981952667236, "loss_ce": 0.05556230992078781, "loss_iou": 0.39453125, "loss_num": 0.1572265625, "loss_xval": 1.5703125, "num_input_tokens_seen": 5438272, "step": 87 }, { "epoch": 0.2928452579034942, "grad_norm": 14.44840145111084, "learning_rate": 5e-06, "loss": 1.7714, "num_input_tokens_seen": 5498964, "step": 88 }, { "epoch": 0.2928452579034942, "loss": 1.6302241086959839, "loss_ce": 0.05844667926430702, "loss_iou": 0.359375, "loss_num": 0.1708984375, "loss_xval": 1.5703125, "num_input_tokens_seen": 5498964, "step": 88 }, { "epoch": 0.2961730449251248, "grad_norm": 26.83960723876953, "learning_rate": 5e-06, "loss": 1.6978, "num_input_tokens_seen": 5561644, "step": 89 }, { "epoch": 0.2961730449251248, "loss": 1.8088901042938232, "loss_ce": 0.06719096004962921, "loss_iou": 0.4921875, "loss_num": 0.15234375, "loss_xval": 1.7421875, "num_input_tokens_seen": 5561644, "step": 89 }, { "epoch": 0.2995008319467554, "grad_norm": 11.925620079040527, "learning_rate": 5e-06, "loss": 1.6314, "num_input_tokens_seen": 5624348, "step": 90 }, { "epoch": 0.2995008319467554, "loss": 1.4735398292541504, "loss_ce": 0.1751999855041504, "loss_iou": 0.41015625, "loss_num": 0.09619140625, "loss_xval": 1.296875, "num_input_tokens_seen": 5624348, "step": 90 }, { "epoch": 0.30282861896838603, "grad_norm": 11.06351089477539, "learning_rate": 5e-06, "loss": 1.2644, "num_input_tokens_seen": 5686716, "step": 91 }, { "epoch": 0.30282861896838603, "loss": 1.3133293390274048, "loss_ce": 0.03647388890385628, "loss_iou": 0.3671875, "loss_num": 0.10888671875, "loss_xval": 1.2734375, "num_input_tokens_seen": 5686716, "step": 91 }, { "epoch": 0.30615640599001664, "grad_norm": 18.150497436523438, "learning_rate": 5e-06, "loss": 1.4589, "num_input_tokens_seen": 5750560, "step": 92 }, { "epoch": 0.30615640599001664, "loss": 1.482915997505188, "loss_ce": 0.026373039931058884, "loss_iou": 0.494140625, "loss_num": 0.09326171875, "loss_xval": 1.453125, "num_input_tokens_seen": 5750560, "step": 92 }, { "epoch": 0.30948419301164726, "grad_norm": 31.141006469726562, "learning_rate": 5e-06, "loss": 1.662, "num_input_tokens_seen": 5813996, "step": 93 }, { "epoch": 0.30948419301164726, "loss": 1.7749927043914795, "loss_ce": 0.032805174589157104, "loss_iou": 0.57421875, "loss_num": 0.119140625, "loss_xval": 1.7421875, "num_input_tokens_seen": 5813996, "step": 93 }, { "epoch": 0.31281198003327787, "grad_norm": 20.244047164916992, "learning_rate": 5e-06, "loss": 1.3877, "num_input_tokens_seen": 5876956, "step": 94 }, { "epoch": 0.31281198003327787, "loss": 1.3969483375549316, "loss_ce": 0.058081258088350296, "loss_iou": 0.421875, "loss_num": 0.09912109375, "loss_xval": 1.3359375, "num_input_tokens_seen": 5876956, "step": 94 }, { "epoch": 0.3161397670549085, "grad_norm": 10.707723617553711, "learning_rate": 5e-06, "loss": 1.552, "num_input_tokens_seen": 5939564, "step": 95 }, { "epoch": 0.3161397670549085, "loss": 1.7702038288116455, "loss_ce": 0.031434379518032074, "loss_iou": 0.5390625, "loss_num": 0.1328125, "loss_xval": 1.7421875, "num_input_tokens_seen": 5939564, "step": 95 }, { "epoch": 0.3194675540765391, "grad_norm": 15.120357513427734, "learning_rate": 5e-06, "loss": 1.3944, "num_input_tokens_seen": 6000348, "step": 96 }, { "epoch": 0.3194675540765391, "loss": 1.3216090202331543, "loss_ce": 0.1492457389831543, "loss_iou": 0.2578125, "loss_num": 0.130859375, "loss_xval": 1.171875, "num_input_tokens_seen": 6000348, "step": 96 }, { "epoch": 0.3227953410981697, "grad_norm": 27.774568557739258, "learning_rate": 5e-06, "loss": 1.6287, "num_input_tokens_seen": 6063144, "step": 97 }, { "epoch": 0.3227953410981697, "loss": 1.5362926721572876, "loss_ce": 0.0611950121819973, "loss_iou": 0.400390625, "loss_num": 0.134765625, "loss_xval": 1.4765625, "num_input_tokens_seen": 6063144, "step": 97 }, { "epoch": 0.3261231281198003, "grad_norm": 23.571321487426758, "learning_rate": 5e-06, "loss": 1.501, "num_input_tokens_seen": 6125216, "step": 98 }, { "epoch": 0.3261231281198003, "loss": 1.4960654973983765, "loss_ce": 0.02975688874721527, "loss_iou": 0.400390625, "loss_num": 0.1328125, "loss_xval": 1.46875, "num_input_tokens_seen": 6125216, "step": 98 }, { "epoch": 0.32945091514143093, "grad_norm": 21.455690383911133, "learning_rate": 5e-06, "loss": 1.2035, "num_input_tokens_seen": 6186436, "step": 99 }, { "epoch": 0.32945091514143093, "loss": 1.533522367477417, "loss_ce": 0.024733252823352814, "loss_iou": 0.482421875, "loss_num": 0.10888671875, "loss_xval": 1.5078125, "num_input_tokens_seen": 6186436, "step": 99 }, { "epoch": 0.33277870216306155, "grad_norm": 19.567928314208984, "learning_rate": 5e-06, "loss": 1.6169, "num_input_tokens_seen": 6248292, "step": 100 }, { "epoch": 0.33277870216306155, "loss": 1.5605666637420654, "loss_ce": 0.15138691663742065, "loss_iou": 0.31640625, "loss_num": 0.1552734375, "loss_xval": 1.40625, "num_input_tokens_seen": 6248292, "step": 100 }, { "epoch": 0.33610648918469216, "grad_norm": 132.5515899658203, "learning_rate": 5e-06, "loss": 1.564, "num_input_tokens_seen": 6312152, "step": 101 }, { "epoch": 0.33610648918469216, "loss": 1.7496979236602783, "loss_ce": 0.02167057991027832, "loss_iou": 0.484375, "loss_num": 0.1513671875, "loss_xval": 1.7265625, "num_input_tokens_seen": 6312152, "step": 101 }, { "epoch": 0.33943427620632277, "grad_norm": 20.127302169799805, "learning_rate": 5e-06, "loss": 1.6141, "num_input_tokens_seen": 6374832, "step": 102 }, { "epoch": 0.33943427620632277, "loss": 1.5366733074188232, "loss_ce": 0.15434911847114563, "loss_iou": 0.4453125, "loss_num": 0.09912109375, "loss_xval": 1.3828125, "num_input_tokens_seen": 6374832, "step": 102 }, { "epoch": 0.3427620632279534, "grad_norm": 26.46497917175293, "learning_rate": 5e-06, "loss": 1.7083, "num_input_tokens_seen": 6439248, "step": 103 }, { "epoch": 0.3427620632279534, "loss": 1.5998561382293701, "loss_ce": 0.11841069906949997, "loss_iou": 0.447265625, "loss_num": 0.11767578125, "loss_xval": 1.484375, "num_input_tokens_seen": 6439248, "step": 103 }, { "epoch": 0.34608985024958405, "grad_norm": 22.090496063232422, "learning_rate": 5e-06, "loss": 1.5894, "num_input_tokens_seen": 6501596, "step": 104 }, { "epoch": 0.34608985024958405, "loss": 1.4435570240020752, "loss_ce": 0.05586168169975281, "loss_iou": 0.4453125, "loss_num": 0.09912109375, "loss_xval": 1.390625, "num_input_tokens_seen": 6501596, "step": 104 }, { "epoch": 0.34941763727121466, "grad_norm": 14.779273986816406, "learning_rate": 5e-06, "loss": 1.5203, "num_input_tokens_seen": 6562808, "step": 105 }, { "epoch": 0.34941763727121466, "loss": 1.5112072229385376, "loss_ce": 0.05271116644144058, "loss_iou": 0.376953125, "loss_num": 0.1416015625, "loss_xval": 1.4609375, "num_input_tokens_seen": 6562808, "step": 105 }, { "epoch": 0.3527454242928453, "grad_norm": 21.532625198364258, "learning_rate": 5e-06, "loss": 1.6474, "num_input_tokens_seen": 6626268, "step": 106 }, { "epoch": 0.3527454242928453, "loss": 1.6102540493011475, "loss_ce": 0.03603534772992134, "loss_iou": 0.5, "loss_num": 0.115234375, "loss_xval": 1.578125, "num_input_tokens_seen": 6626268, "step": 106 }, { "epoch": 0.3560732113144759, "grad_norm": 20.450246810913086, "learning_rate": 5e-06, "loss": 1.6417, "num_input_tokens_seen": 6688448, "step": 107 }, { "epoch": 0.3560732113144759, "loss": 1.5604519844055176, "loss_ce": 0.10928011685609818, "loss_iou": 0.44921875, "loss_num": 0.11083984375, "loss_xval": 1.453125, "num_input_tokens_seen": 6688448, "step": 107 }, { "epoch": 0.3594009983361065, "grad_norm": 11.704852104187012, "learning_rate": 5e-06, "loss": 1.4292, "num_input_tokens_seen": 6751076, "step": 108 }, { "epoch": 0.3594009983361065, "loss": 1.302997350692749, "loss_ce": 0.06276305764913559, "loss_iou": 0.375, "loss_num": 0.0986328125, "loss_xval": 1.2421875, "num_input_tokens_seen": 6751076, "step": 108 }, { "epoch": 0.3627287853577371, "grad_norm": 26.655656814575195, "learning_rate": 5e-06, "loss": 1.5021, "num_input_tokens_seen": 6814604, "step": 109 }, { "epoch": 0.3627287853577371, "loss": 1.4590511322021484, "loss_ce": 0.03766445070505142, "loss_iou": 0.39453125, "loss_num": 0.126953125, "loss_xval": 1.421875, "num_input_tokens_seen": 6814604, "step": 109 }, { "epoch": 0.36605657237936773, "grad_norm": 32.32645797729492, "learning_rate": 5e-06, "loss": 1.293, "num_input_tokens_seen": 6877660, "step": 110 }, { "epoch": 0.36605657237936773, "loss": 1.2947750091552734, "loss_ce": 0.05161091312766075, "loss_iou": 0.365234375, "loss_num": 0.1025390625, "loss_xval": 1.2421875, "num_input_tokens_seen": 6877660, "step": 110 }, { "epoch": 0.36938435940099834, "grad_norm": 19.443389892578125, "learning_rate": 5e-06, "loss": 1.4704, "num_input_tokens_seen": 6940376, "step": 111 }, { "epoch": 0.36938435940099834, "loss": 1.6598196029663086, "loss_ce": 0.016264785081148148, "loss_iou": 0.439453125, "loss_num": 0.15234375, "loss_xval": 1.640625, "num_input_tokens_seen": 6940376, "step": 111 }, { "epoch": 0.37271214642262895, "grad_norm": 17.155519485473633, "learning_rate": 5e-06, "loss": 1.468, "num_input_tokens_seen": 7002968, "step": 112 }, { "epoch": 0.37271214642262895, "loss": 1.4055347442626953, "loss_ce": 0.047136228531599045, "loss_iou": 0.32421875, "loss_num": 0.1416015625, "loss_xval": 1.359375, "num_input_tokens_seen": 7002968, "step": 112 }, { "epoch": 0.37603993344425957, "grad_norm": 233.18862915039062, "learning_rate": 5e-06, "loss": 1.7817, "num_input_tokens_seen": 7067164, "step": 113 }, { "epoch": 0.37603993344425957, "loss": 1.9062466621398926, "loss_ce": 0.01269207801669836, "loss_iou": 0.58984375, "loss_num": 0.142578125, "loss_xval": 1.890625, "num_input_tokens_seen": 7067164, "step": 113 }, { "epoch": 0.3793677204658902, "grad_norm": 35.83794403076172, "learning_rate": 5e-06, "loss": 1.7392, "num_input_tokens_seen": 7130484, "step": 114 }, { "epoch": 0.3793677204658902, "loss": 1.760888934135437, "loss_ce": 0.05971705913543701, "loss_iou": 0.494140625, "loss_num": 0.1416015625, "loss_xval": 1.703125, "num_input_tokens_seen": 7130484, "step": 114 }, { "epoch": 0.3826955074875208, "grad_norm": 17.227394104003906, "learning_rate": 5e-06, "loss": 1.4545, "num_input_tokens_seen": 7192824, "step": 115 }, { "epoch": 0.3826955074875208, "loss": 1.311098337173462, "loss_ce": 0.025453729555010796, "loss_iou": 0.380859375, "loss_num": 0.10498046875, "loss_xval": 1.2890625, "num_input_tokens_seen": 7192824, "step": 115 }, { "epoch": 0.3860232945091514, "grad_norm": 20.637577056884766, "learning_rate": 5e-06, "loss": 1.4128, "num_input_tokens_seen": 7254948, "step": 116 }, { "epoch": 0.3860232945091514, "loss": 1.4620569944381714, "loss_ce": 0.043355897068977356, "loss_iou": 0.376953125, "loss_num": 0.1328125, "loss_xval": 1.421875, "num_input_tokens_seen": 7254948, "step": 116 }, { "epoch": 0.389351081530782, "grad_norm": 32.69279479980469, "learning_rate": 5e-06, "loss": 1.7562, "num_input_tokens_seen": 7318796, "step": 117 }, { "epoch": 0.389351081530782, "loss": 1.807417392730713, "loss_ce": 0.037886105477809906, "loss_iou": 0.494140625, "loss_num": 0.15625, "loss_xval": 1.765625, "num_input_tokens_seen": 7318796, "step": 117 }, { "epoch": 0.39267886855241263, "grad_norm": 11.888574600219727, "learning_rate": 5e-06, "loss": 1.3916, "num_input_tokens_seen": 7379464, "step": 118 }, { "epoch": 0.39267886855241263, "loss": 1.490173101425171, "loss_ce": 0.04559309780597687, "loss_iou": 0.4140625, "loss_num": 0.12353515625, "loss_xval": 1.4453125, "num_input_tokens_seen": 7379464, "step": 118 }, { "epoch": 0.39600665557404324, "grad_norm": 21.6405029296875, "learning_rate": 5e-06, "loss": 1.2959, "num_input_tokens_seen": 7441932, "step": 119 }, { "epoch": 0.39600665557404324, "loss": 1.2409827709197998, "loss_ce": 0.017350006848573685, "loss_iou": 0.302734375, "loss_num": 0.12353515625, "loss_xval": 1.2265625, "num_input_tokens_seen": 7441932, "step": 119 }, { "epoch": 0.39933444259567386, "grad_norm": 196.78163146972656, "learning_rate": 5e-06, "loss": 1.7212, "num_input_tokens_seen": 7505232, "step": 120 }, { "epoch": 0.39933444259567386, "loss": 1.5443594455718994, "loss_ce": 0.028734492138028145, "loss_iou": 0.48828125, "loss_num": 0.107421875, "loss_xval": 1.515625, "num_input_tokens_seen": 7505232, "step": 120 }, { "epoch": 0.40266222961730447, "grad_norm": 118.65660858154297, "learning_rate": 5e-06, "loss": 1.3824, "num_input_tokens_seen": 7568056, "step": 121 }, { "epoch": 0.40266222961730447, "loss": 1.408996343612671, "loss_ce": 0.02081269398331642, "loss_iou": 0.365234375, "loss_num": 0.130859375, "loss_xval": 1.390625, "num_input_tokens_seen": 7568056, "step": 121 }, { "epoch": 0.4059900166389351, "grad_norm": 19.64975929260254, "learning_rate": 5e-06, "loss": 1.5991, "num_input_tokens_seen": 7632352, "step": 122 }, { "epoch": 0.4059900166389351, "loss": 1.42240571975708, "loss_ce": 0.056683123111724854, "loss_iou": 0.46484375, "loss_num": 0.0869140625, "loss_xval": 1.3671875, "num_input_tokens_seen": 7632352, "step": 122 }, { "epoch": 0.40931780366056575, "grad_norm": 47.080657958984375, "learning_rate": 5e-06, "loss": 1.4264, "num_input_tokens_seen": 7695112, "step": 123 }, { "epoch": 0.40931780366056575, "loss": 1.4007999897003174, "loss_ce": 0.023846877738833427, "loss_iou": 0.42578125, "loss_num": 0.10546875, "loss_xval": 1.375, "num_input_tokens_seen": 7695112, "step": 123 }, { "epoch": 0.41264559068219636, "grad_norm": 16.035123825073242, "learning_rate": 5e-06, "loss": 1.5467, "num_input_tokens_seen": 7758548, "step": 124 }, { "epoch": 0.41264559068219636, "loss": 1.532633900642395, "loss_ce": 0.059977658092975616, "loss_iou": 0.447265625, "loss_num": 0.115234375, "loss_xval": 1.46875, "num_input_tokens_seen": 7758548, "step": 124 }, { "epoch": 0.415973377703827, "grad_norm": 15.68152904510498, "learning_rate": 5e-06, "loss": 1.406, "num_input_tokens_seen": 7818644, "step": 125 }, { "epoch": 0.415973377703827, "loss": 1.4248697757720947, "loss_ce": 0.009464464150369167, "loss_iou": 0.388671875, "loss_num": 0.1279296875, "loss_xval": 1.4140625, "num_input_tokens_seen": 7818644, "step": 125 }, { "epoch": 0.4193011647254576, "grad_norm": 23.207124710083008, "learning_rate": 5e-06, "loss": 1.5815, "num_input_tokens_seen": 7881772, "step": 126 }, { "epoch": 0.4193011647254576, "loss": 1.6510601043701172, "loss_ce": 0.010435021482408047, "loss_iou": 0.458984375, "loss_num": 0.14453125, "loss_xval": 1.640625, "num_input_tokens_seen": 7881772, "step": 126 }, { "epoch": 0.4226289517470882, "grad_norm": 17.7788028717041, "learning_rate": 5e-06, "loss": 1.3724, "num_input_tokens_seen": 7943924, "step": 127 }, { "epoch": 0.4226289517470882, "loss": 1.4064964056015015, "loss_ce": 0.027834344655275345, "loss_iou": 0.359375, "loss_num": 0.1318359375, "loss_xval": 1.375, "num_input_tokens_seen": 7943924, "step": 127 }, { "epoch": 0.4259567387687188, "grad_norm": 22.62128448486328, "learning_rate": 5e-06, "loss": 1.6814, "num_input_tokens_seen": 8007756, "step": 128 }, { "epoch": 0.4259567387687188, "loss": 1.7109909057617188, "loss_ce": 0.05376438423991203, "loss_iou": 0.46484375, "loss_num": 0.1455078125, "loss_xval": 1.65625, "num_input_tokens_seen": 8007756, "step": 128 }, { "epoch": 0.4292845257903494, "grad_norm": 17.13036346435547, "learning_rate": 5e-06, "loss": 1.5, "num_input_tokens_seen": 8071624, "step": 129 }, { "epoch": 0.4292845257903494, "loss": 1.517619252204895, "loss_ce": 0.04252162203192711, "loss_iou": 0.46875, "loss_num": 0.10693359375, "loss_xval": 1.4765625, "num_input_tokens_seen": 8071624, "step": 129 }, { "epoch": 0.43261231281198004, "grad_norm": 28.38715934753418, "learning_rate": 5e-06, "loss": 1.6712, "num_input_tokens_seen": 8135204, "step": 130 }, { "epoch": 0.43261231281198004, "loss": 1.6535531282424927, "loss_ce": 0.016834398731589317, "loss_iou": 0.51171875, "loss_num": 0.123046875, "loss_xval": 1.640625, "num_input_tokens_seen": 8135204, "step": 130 }, { "epoch": 0.43594009983361065, "grad_norm": 21.765769958496094, "learning_rate": 5e-06, "loss": 1.2592, "num_input_tokens_seen": 8198328, "step": 131 }, { "epoch": 0.43594009983361065, "loss": 1.0689215660095215, "loss_ce": 0.04450753331184387, "loss_iou": 0.30859375, "loss_num": 0.08203125, "loss_xval": 1.0234375, "num_input_tokens_seen": 8198328, "step": 131 }, { "epoch": 0.43926788685524126, "grad_norm": 12.410709381103516, "learning_rate": 5e-06, "loss": 1.2945, "num_input_tokens_seen": 8260932, "step": 132 }, { "epoch": 0.43926788685524126, "loss": 1.2807693481445312, "loss_ce": 0.014656049199402332, "loss_iou": 0.32421875, "loss_num": 0.12353515625, "loss_xval": 1.265625, "num_input_tokens_seen": 8260932, "step": 132 }, { "epoch": 0.4425956738768719, "grad_norm": 24.125675201416016, "learning_rate": 5e-06, "loss": 1.4355, "num_input_tokens_seen": 8324024, "step": 133 }, { "epoch": 0.4425956738768719, "loss": 1.2214634418487549, "loss_ce": 0.035916537046432495, "loss_iou": 0.28125, "loss_num": 0.12451171875, "loss_xval": 1.1875, "num_input_tokens_seen": 8324024, "step": 133 }, { "epoch": 0.4459234608985025, "grad_norm": 249.4659423828125, "learning_rate": 5e-06, "loss": 1.2783, "num_input_tokens_seen": 8386732, "step": 134 }, { "epoch": 0.4459234608985025, "loss": 1.165820837020874, "loss_ce": 0.022754406556487083, "loss_iou": 0.34765625, "loss_num": 0.08935546875, "loss_xval": 1.140625, "num_input_tokens_seen": 8386732, "step": 134 }, { "epoch": 0.4492512479201331, "grad_norm": 15.685258865356445, "learning_rate": 5e-06, "loss": 1.6974, "num_input_tokens_seen": 8449704, "step": 135 }, { "epoch": 0.4492512479201331, "loss": 1.7366528511047363, "loss_ce": 0.01448477990925312, "loss_iou": 0.515625, "loss_num": 0.1376953125, "loss_xval": 1.71875, "num_input_tokens_seen": 8449704, "step": 135 }, { "epoch": 0.4525790349417637, "grad_norm": 15.560267448425293, "learning_rate": 5e-06, "loss": 1.3367, "num_input_tokens_seen": 8511980, "step": 136 }, { "epoch": 0.4525790349417637, "loss": 1.5319541692733765, "loss_ce": 0.06418071687221527, "loss_iou": 0.50390625, "loss_num": 0.091796875, "loss_xval": 1.46875, "num_input_tokens_seen": 8511980, "step": 136 }, { "epoch": 0.4559068219633943, "grad_norm": 31.106922149658203, "learning_rate": 5e-06, "loss": 1.3191, "num_input_tokens_seen": 8575108, "step": 137 }, { "epoch": 0.4559068219633943, "loss": 1.1032752990722656, "loss_ce": 0.01050189882516861, "loss_iou": 0.2734375, "loss_num": 0.10888671875, "loss_xval": 1.09375, "num_input_tokens_seen": 8575108, "step": 137 }, { "epoch": 0.45923460898502494, "grad_norm": 19.03159523010254, "learning_rate": 5e-06, "loss": 1.51, "num_input_tokens_seen": 8637260, "step": 138 }, { "epoch": 0.45923460898502494, "loss": 1.4713654518127441, "loss_ce": 0.05046709254384041, "loss_iou": 0.416015625, "loss_num": 0.1171875, "loss_xval": 1.421875, "num_input_tokens_seen": 8637260, "step": 138 }, { "epoch": 0.46256239600665555, "grad_norm": 18.39104652404785, "learning_rate": 5e-06, "loss": 1.5178, "num_input_tokens_seen": 8699912, "step": 139 }, { "epoch": 0.46256239600665555, "loss": 1.5592514276504517, "loss_ce": 0.04411466047167778, "loss_iou": 0.49609375, "loss_num": 0.1044921875, "loss_xval": 1.515625, "num_input_tokens_seen": 8699912, "step": 139 }, { "epoch": 0.46589018302828616, "grad_norm": 16.95204734802246, "learning_rate": 5e-06, "loss": 1.4614, "num_input_tokens_seen": 8763456, "step": 140 }, { "epoch": 0.46589018302828616, "loss": 1.5307507514953613, "loss_ce": 0.026844505220651627, "loss_iou": 0.419921875, "loss_num": 0.1328125, "loss_xval": 1.5, "num_input_tokens_seen": 8763456, "step": 140 }, { "epoch": 0.46921797004991683, "grad_norm": 13.439885139465332, "learning_rate": 5e-06, "loss": 1.2669, "num_input_tokens_seen": 8823008, "step": 141 }, { "epoch": 0.46921797004991683, "loss": 1.1225981712341309, "loss_ce": 0.03861372545361519, "loss_iou": 0.2294921875, "loss_num": 0.125, "loss_xval": 1.0859375, "num_input_tokens_seen": 8823008, "step": 141 }, { "epoch": 0.47254575707154745, "grad_norm": 13.823068618774414, "learning_rate": 5e-06, "loss": 1.3294, "num_input_tokens_seen": 8885756, "step": 142 }, { "epoch": 0.47254575707154745, "loss": 1.3653852939605713, "loss_ce": 0.011625496670603752, "loss_iou": 0.3671875, "loss_num": 0.1240234375, "loss_xval": 1.3515625, "num_input_tokens_seen": 8885756, "step": 142 }, { "epoch": 0.47587354409317806, "grad_norm": 46.61298751831055, "learning_rate": 5e-06, "loss": 1.6217, "num_input_tokens_seen": 8949676, "step": 143 }, { "epoch": 0.47587354409317806, "loss": 1.6690086126327515, "loss_ce": 0.08209459483623505, "loss_iou": 0.4453125, "loss_num": 0.1396484375, "loss_xval": 1.5859375, "num_input_tokens_seen": 8949676, "step": 143 }, { "epoch": 0.47920133111480867, "grad_norm": 12.167203903198242, "learning_rate": 5e-06, "loss": 1.1975, "num_input_tokens_seen": 9012212, "step": 144 }, { "epoch": 0.47920133111480867, "loss": 1.0818878412246704, "loss_ce": 0.06528623402118683, "loss_iou": 0.302734375, "loss_num": 0.08251953125, "loss_xval": 1.015625, "num_input_tokens_seen": 9012212, "step": 144 }, { "epoch": 0.4825291181364393, "grad_norm": 19.003299713134766, "learning_rate": 5e-06, "loss": 1.3821, "num_input_tokens_seen": 9075056, "step": 145 }, { "epoch": 0.4825291181364393, "loss": 1.3460979461669922, "loss_ce": 0.01919359713792801, "loss_iou": 0.404296875, "loss_num": 0.103515625, "loss_xval": 1.328125, "num_input_tokens_seen": 9075056, "step": 145 }, { "epoch": 0.4858569051580699, "grad_norm": 30.158510208129883, "learning_rate": 5e-06, "loss": 1.3432, "num_input_tokens_seen": 9138200, "step": 146 }, { "epoch": 0.4858569051580699, "loss": 1.3490657806396484, "loss_ce": 0.0026301806792616844, "loss_iou": 0.4453125, "loss_num": 0.09130859375, "loss_xval": 1.34375, "num_input_tokens_seen": 9138200, "step": 146 }, { "epoch": 0.4891846921797005, "grad_norm": 13.28848648071289, "learning_rate": 5e-06, "loss": 1.2648, "num_input_tokens_seen": 9201716, "step": 147 }, { "epoch": 0.4891846921797005, "loss": 1.31808340549469, "loss_ce": 0.007536512799561024, "loss_iou": 0.400390625, "loss_num": 0.1015625, "loss_xval": 1.3125, "num_input_tokens_seen": 9201716, "step": 147 }, { "epoch": 0.4925124792013311, "grad_norm": 22.02879524230957, "learning_rate": 5e-06, "loss": 1.2787, "num_input_tokens_seen": 9264876, "step": 148 }, { "epoch": 0.4925124792013311, "loss": 1.3372578620910645, "loss_ce": 0.014503922313451767, "loss_iou": 0.388671875, "loss_num": 0.10888671875, "loss_xval": 1.3203125, "num_input_tokens_seen": 9264876, "step": 148 }, { "epoch": 0.49584026622296173, "grad_norm": 15.431622505187988, "learning_rate": 5e-06, "loss": 1.4477, "num_input_tokens_seen": 9327456, "step": 149 }, { "epoch": 0.49584026622296173, "loss": 1.287103295326233, "loss_ce": 0.03173219412565231, "loss_iou": 0.38671875, "loss_num": 0.09619140625, "loss_xval": 1.2578125, "num_input_tokens_seen": 9327456, "step": 149 }, { "epoch": 0.49916805324459235, "grad_norm": 14.655500411987305, "learning_rate": 5e-06, "loss": 1.5951, "num_input_tokens_seen": 9391828, "step": 150 }, { "epoch": 0.49916805324459235, "loss": 1.5570836067199707, "loss_ce": 0.03169288486242294, "loss_iou": 0.46875, "loss_num": 0.11767578125, "loss_xval": 1.5234375, "num_input_tokens_seen": 9391828, "step": 150 }, { "epoch": 0.502495840266223, "grad_norm": 12.9921236038208, "learning_rate": 5e-06, "loss": 1.1374, "num_input_tokens_seen": 9453992, "step": 151 }, { "epoch": 0.502495840266223, "loss": 1.1639986038208008, "loss_ce": 0.00481888884678483, "loss_iou": 0.2578125, "loss_num": 0.12890625, "loss_xval": 1.15625, "num_input_tokens_seen": 9453992, "step": 151 }, { "epoch": 0.5058236272878536, "grad_norm": 12.746012687683105, "learning_rate": 5e-06, "loss": 1.4932, "num_input_tokens_seen": 9515028, "step": 152 }, { "epoch": 0.5058236272878536, "loss": 1.3379508256912231, "loss_ce": 0.004943011794239283, "loss_iou": 0.43359375, "loss_num": 0.0927734375, "loss_xval": 1.3359375, "num_input_tokens_seen": 9515028, "step": 152 }, { "epoch": 0.5091514143094842, "grad_norm": 12.805912017822266, "learning_rate": 5e-06, "loss": 1.3026, "num_input_tokens_seen": 9576904, "step": 153 }, { "epoch": 0.5091514143094842, "loss": 1.1774930953979492, "loss_ce": 0.010500917211174965, "loss_iou": 0.294921875, "loss_num": 0.1162109375, "loss_xval": 1.1640625, "num_input_tokens_seen": 9576904, "step": 153 }, { "epoch": 0.5124792013311148, "grad_norm": 11.221924781799316, "learning_rate": 5e-06, "loss": 1.4352, "num_input_tokens_seen": 9640896, "step": 154 }, { "epoch": 0.5124792013311148, "loss": 1.4379901885986328, "loss_ce": 0.014650269411504269, "loss_iou": 0.474609375, "loss_num": 0.09521484375, "loss_xval": 1.421875, "num_input_tokens_seen": 9640896, "step": 154 }, { "epoch": 0.5158069883527454, "grad_norm": 29.179161071777344, "learning_rate": 5e-06, "loss": 1.4006, "num_input_tokens_seen": 9704416, "step": 155 }, { "epoch": 0.5158069883527454, "loss": 1.2948763370513916, "loss_ce": 0.02729811705648899, "loss_iou": 0.361328125, "loss_num": 0.109375, "loss_xval": 1.265625, "num_input_tokens_seen": 9704416, "step": 155 }, { "epoch": 0.519134775374376, "grad_norm": 30.093252182006836, "learning_rate": 5e-06, "loss": 1.5561, "num_input_tokens_seen": 9767272, "step": 156 }, { "epoch": 0.519134775374376, "loss": 1.4946575164794922, "loss_ce": 0.012723930180072784, "loss_iou": 0.46875, "loss_num": 0.1083984375, "loss_xval": 1.484375, "num_input_tokens_seen": 9767272, "step": 156 }, { "epoch": 0.5224625623960066, "grad_norm": 14.34788990020752, "learning_rate": 5e-06, "loss": 1.4241, "num_input_tokens_seen": 9829412, "step": 157 }, { "epoch": 0.5224625623960066, "loss": 1.4533286094665527, "loss_ce": 0.002156792674213648, "loss_iou": 0.451171875, "loss_num": 0.109375, "loss_xval": 1.453125, "num_input_tokens_seen": 9829412, "step": 157 }, { "epoch": 0.5257903494176372, "grad_norm": 45.826786041259766, "learning_rate": 5e-06, "loss": 1.1328, "num_input_tokens_seen": 9891184, "step": 158 }, { "epoch": 0.5257903494176372, "loss": 1.2606561183929443, "loss_ce": 0.008825048804283142, "loss_iou": 0.4375, "loss_num": 0.07568359375, "loss_xval": 1.25, "num_input_tokens_seen": 9891184, "step": 158 }, { "epoch": 0.5291181364392679, "grad_norm": 14.152484893798828, "learning_rate": 5e-06, "loss": 1.2858, "num_input_tokens_seen": 9952284, "step": 159 }, { "epoch": 0.5291181364392679, "loss": 1.2074310779571533, "loss_ce": 0.024569693952798843, "loss_iou": 0.330078125, "loss_num": 0.1044921875, "loss_xval": 1.1796875, "num_input_tokens_seen": 9952284, "step": 159 }, { "epoch": 0.5324459234608985, "grad_norm": 37.07005310058594, "learning_rate": 5e-06, "loss": 1.5076, "num_input_tokens_seen": 10014964, "step": 160 }, { "epoch": 0.5324459234608985, "loss": 1.5249152183532715, "loss_ce": 0.008313634432852268, "loss_iou": 0.43359375, "loss_num": 0.1298828125, "loss_xval": 1.515625, "num_input_tokens_seen": 10014964, "step": 160 }, { "epoch": 0.5357737104825291, "grad_norm": 17.514577865600586, "learning_rate": 5e-06, "loss": 1.2458, "num_input_tokens_seen": 10078884, "step": 161 }, { "epoch": 0.5357737104825291, "loss": 0.9992653727531433, "loss_ce": 0.006589556112885475, "loss_iou": 0.27734375, "loss_num": 0.08740234375, "loss_xval": 0.9921875, "num_input_tokens_seen": 10078884, "step": 161 }, { "epoch": 0.5391014975041597, "grad_norm": 19.246084213256836, "learning_rate": 5e-06, "loss": 1.3381, "num_input_tokens_seen": 10141460, "step": 162 }, { "epoch": 0.5391014975041597, "loss": 1.0187444686889648, "loss_ce": 0.012152590788900852, "loss_iou": 0.24609375, "loss_num": 0.10302734375, "loss_xval": 1.0078125, "num_input_tokens_seen": 10141460, "step": 162 }, { "epoch": 0.5424292845257903, "grad_norm": 14.021342277526855, "learning_rate": 5e-06, "loss": 1.2232, "num_input_tokens_seen": 10203772, "step": 163 }, { "epoch": 0.5424292845257903, "loss": 1.3046138286590576, "loss_ce": 0.004809187725186348, "loss_iou": 0.40234375, "loss_num": 0.0986328125, "loss_xval": 1.296875, "num_input_tokens_seen": 10203772, "step": 163 }, { "epoch": 0.5457570715474209, "grad_norm": 20.620309829711914, "learning_rate": 5e-06, "loss": 1.3921, "num_input_tokens_seen": 10266640, "step": 164 }, { "epoch": 0.5457570715474209, "loss": 1.3793249130249023, "loss_ce": 0.012625731527805328, "loss_iou": 0.478515625, "loss_num": 0.08154296875, "loss_xval": 1.3671875, "num_input_tokens_seen": 10266640, "step": 164 }, { "epoch": 0.5490848585690515, "grad_norm": 27.303794860839844, "learning_rate": 5e-06, "loss": 1.3677, "num_input_tokens_seen": 10330220, "step": 165 }, { "epoch": 0.5490848585690515, "loss": 1.2398979663848877, "loss_ce": 0.002593299839645624, "loss_iou": 0.416015625, "loss_num": 0.0810546875, "loss_xval": 1.234375, "num_input_tokens_seen": 10330220, "step": 165 }, { "epoch": 0.5524126455906821, "grad_norm": 15.321734428405762, "learning_rate": 5e-06, "loss": 1.3985, "num_input_tokens_seen": 10392312, "step": 166 }, { "epoch": 0.5524126455906821, "loss": 1.1850658655166626, "loss_ce": 0.030891068279743195, "loss_iou": 0.279296875, "loss_num": 0.119140625, "loss_xval": 1.15625, "num_input_tokens_seen": 10392312, "step": 166 }, { "epoch": 0.5557404326123128, "grad_norm": 48.359825134277344, "learning_rate": 5e-06, "loss": 1.1362, "num_input_tokens_seen": 10455024, "step": 167 }, { "epoch": 0.5557404326123128, "loss": 1.3550812005996704, "loss_ce": 0.0035187224857509136, "loss_iou": 0.419921875, "loss_num": 0.1025390625, "loss_xval": 1.3515625, "num_input_tokens_seen": 10455024, "step": 167 }, { "epoch": 0.5590682196339434, "grad_norm": 31.070018768310547, "learning_rate": 5e-06, "loss": 1.0637, "num_input_tokens_seen": 10516804, "step": 168 }, { "epoch": 0.5590682196339434, "loss": 0.8580918312072754, "loss_ce": 0.006285225972533226, "loss_iou": 0.1767578125, "loss_num": 0.099609375, "loss_xval": 0.8515625, "num_input_tokens_seen": 10516804, "step": 168 }, { "epoch": 0.562396006655574, "grad_norm": 13.717340469360352, "learning_rate": 5e-06, "loss": 1.3105, "num_input_tokens_seen": 10579816, "step": 169 }, { "epoch": 0.562396006655574, "loss": 1.291137933731079, "loss_ce": 0.017700420692563057, "loss_iou": 0.40625, "loss_num": 0.09228515625, "loss_xval": 1.2734375, "num_input_tokens_seen": 10579816, "step": 169 }, { "epoch": 0.5657237936772047, "grad_norm": 14.410500526428223, "learning_rate": 5e-06, "loss": 1.1444, "num_input_tokens_seen": 10640804, "step": 170 }, { "epoch": 0.5657237936772047, "loss": 1.0607528686523438, "loss_ce": 0.012901253998279572, "loss_iou": 0.2578125, "loss_num": 0.1064453125, "loss_xval": 1.046875, "num_input_tokens_seen": 10640804, "step": 170 }, { "epoch": 0.5690515806988353, "grad_norm": 21.776578903198242, "learning_rate": 5e-06, "loss": 1.5343, "num_input_tokens_seen": 10705984, "step": 171 }, { "epoch": 0.5690515806988353, "loss": 1.5333166122436523, "loss_ce": 0.013785396702587605, "loss_iou": 0.54296875, "loss_num": 0.0869140625, "loss_xval": 1.515625, "num_input_tokens_seen": 10705984, "step": 171 }, { "epoch": 0.5723793677204659, "grad_norm": 20.108116149902344, "learning_rate": 5e-06, "loss": 1.4882, "num_input_tokens_seen": 10768956, "step": 172 }, { "epoch": 0.5723793677204659, "loss": 1.473107099533081, "loss_ce": 0.015587646514177322, "loss_iou": 0.482421875, "loss_num": 0.0986328125, "loss_xval": 1.4609375, "num_input_tokens_seen": 10768956, "step": 172 }, { "epoch": 0.5757071547420965, "grad_norm": 60.56779479980469, "learning_rate": 5e-06, "loss": 1.6353, "num_input_tokens_seen": 10832704, "step": 173 }, { "epoch": 0.5757071547420965, "loss": 1.7748465538024902, "loss_ce": 0.0072684986516833305, "loss_iou": 0.5234375, "loss_num": 0.14453125, "loss_xval": 1.765625, "num_input_tokens_seen": 10832704, "step": 173 }, { "epoch": 0.5790349417637272, "grad_norm": 19.885292053222656, "learning_rate": 5e-06, "loss": 1.3888, "num_input_tokens_seen": 10895876, "step": 174 }, { "epoch": 0.5790349417637272, "loss": 1.2018749713897705, "loss_ce": 0.0016796982381492853, "loss_iou": 0.365234375, "loss_num": 0.09423828125, "loss_xval": 1.203125, "num_input_tokens_seen": 10895876, "step": 174 }, { "epoch": 0.5823627287853578, "grad_norm": 32.432376861572266, "learning_rate": 5e-06, "loss": 1.2947, "num_input_tokens_seen": 10957804, "step": 175 }, { "epoch": 0.5823627287853578, "loss": 1.3988152742385864, "loss_ce": 0.003551559057086706, "loss_iou": 0.42578125, "loss_num": 0.1083984375, "loss_xval": 1.3984375, "num_input_tokens_seen": 10957804, "step": 175 }, { "epoch": 0.5856905158069884, "grad_norm": 81.50908660888672, "learning_rate": 5e-06, "loss": 1.1717, "num_input_tokens_seen": 11017420, "step": 176 }, { "epoch": 0.5856905158069884, "loss": 1.09592866897583, "loss_ce": 0.021709948778152466, "loss_iou": 0.189453125, "loss_num": 0.138671875, "loss_xval": 1.078125, "num_input_tokens_seen": 11017420, "step": 176 }, { "epoch": 0.589018302828619, "grad_norm": 14.417983055114746, "learning_rate": 5e-06, "loss": 1.5833, "num_input_tokens_seen": 11080940, "step": 177 }, { "epoch": 0.589018302828619, "loss": 1.7335617542266846, "loss_ce": 0.020182903856039047, "loss_iou": 0.5234375, "loss_num": 0.1337890625, "loss_xval": 1.7109375, "num_input_tokens_seen": 11080940, "step": 177 }, { "epoch": 0.5923460898502496, "grad_norm": 67.00255584716797, "learning_rate": 5e-06, "loss": 1.3181, "num_input_tokens_seen": 11144980, "step": 178 }, { "epoch": 0.5923460898502496, "loss": 1.448054313659668, "loss_ce": 0.03496834263205528, "loss_iou": 0.462890625, "loss_num": 0.09716796875, "loss_xval": 1.4140625, "num_input_tokens_seen": 11144980, "step": 178 }, { "epoch": 0.5956738768718802, "grad_norm": 17.97549057006836, "learning_rate": 5e-06, "loss": 1.3473, "num_input_tokens_seen": 11208712, "step": 179 }, { "epoch": 0.5956738768718802, "loss": 1.2413945198059082, "loss_ce": 0.0031132774893194437, "loss_iou": 0.408203125, "loss_num": 0.08447265625, "loss_xval": 1.234375, "num_input_tokens_seen": 11208712, "step": 179 }, { "epoch": 0.5990016638935108, "grad_norm": 23.58061408996582, "learning_rate": 5e-06, "loss": 1.3682, "num_input_tokens_seen": 11271028, "step": 180 }, { "epoch": 0.5990016638935108, "loss": 1.1757802963256836, "loss_ce": 0.0268545038998127, "loss_iou": 0.330078125, "loss_num": 0.09814453125, "loss_xval": 1.1484375, "num_input_tokens_seen": 11271028, "step": 180 }, { "epoch": 0.6023294509151415, "grad_norm": 15.865267753601074, "learning_rate": 5e-06, "loss": 1.1611, "num_input_tokens_seen": 11332560, "step": 181 }, { "epoch": 0.6023294509151415, "loss": 1.332716941833496, "loss_ce": 0.01826382614672184, "loss_iou": 0.376953125, "loss_num": 0.1123046875, "loss_xval": 1.3125, "num_input_tokens_seen": 11332560, "step": 181 }, { "epoch": 0.6056572379367721, "grad_norm": 14.636528968811035, "learning_rate": 5e-06, "loss": 1.4119, "num_input_tokens_seen": 11394592, "step": 182 }, { "epoch": 0.6056572379367721, "loss": 1.3619801998138428, "loss_ce": 0.004070098511874676, "loss_iou": 0.421875, "loss_num": 0.1025390625, "loss_xval": 1.359375, "num_input_tokens_seen": 11394592, "step": 182 }, { "epoch": 0.6089850249584027, "grad_norm": 14.351191520690918, "learning_rate": 5e-06, "loss": 1.3834, "num_input_tokens_seen": 11457912, "step": 183 }, { "epoch": 0.6089850249584027, "loss": 1.633519172668457, "loss_ce": 0.019261367619037628, "loss_iou": 0.55078125, "loss_num": 0.10302734375, "loss_xval": 1.6171875, "num_input_tokens_seen": 11457912, "step": 183 }, { "epoch": 0.6123128119800333, "grad_norm": 16.637283325195312, "learning_rate": 5e-06, "loss": 1.23, "num_input_tokens_seen": 11521492, "step": 184 }, { "epoch": 0.6123128119800333, "loss": 1.2156696319580078, "loss_ce": 0.05990792065858841, "loss_iou": 0.3359375, "loss_num": 0.09716796875, "loss_xval": 1.15625, "num_input_tokens_seen": 11521492, "step": 184 }, { "epoch": 0.6156405990016639, "grad_norm": 34.948036193847656, "learning_rate": 5e-06, "loss": 1.5363, "num_input_tokens_seen": 11584208, "step": 185 }, { "epoch": 0.6156405990016639, "loss": 1.6890095472335815, "loss_ce": 0.034224435687065125, "loss_iou": 0.5859375, "loss_num": 0.09619140625, "loss_xval": 1.65625, "num_input_tokens_seen": 11584208, "step": 185 }, { "epoch": 0.6189683860232945, "grad_norm": 14.760610580444336, "learning_rate": 5e-06, "loss": 1.129, "num_input_tokens_seen": 11643560, "step": 186 }, { "epoch": 0.6189683860232945, "loss": 1.1673153638839722, "loss_ce": 0.0008114665979519486, "loss_iou": 0.2734375, "loss_num": 0.1240234375, "loss_xval": 1.1640625, "num_input_tokens_seen": 11643560, "step": 186 }, { "epoch": 0.6222961730449251, "grad_norm": 16.94452667236328, "learning_rate": 5e-06, "loss": 1.2258, "num_input_tokens_seen": 11705768, "step": 187 }, { "epoch": 0.6222961730449251, "loss": 1.3460886478424072, "loss_ce": 0.0033152345567941666, "loss_iou": 0.4375, "loss_num": 0.09326171875, "loss_xval": 1.34375, "num_input_tokens_seen": 11705768, "step": 187 }, { "epoch": 0.6256239600665557, "grad_norm": 11.506011009216309, "learning_rate": 5e-06, "loss": 1.3508, "num_input_tokens_seen": 11766764, "step": 188 }, { "epoch": 0.6256239600665557, "loss": 1.2646206617355347, "loss_ce": 0.04477202519774437, "loss_iou": 0.337890625, "loss_num": 0.10888671875, "loss_xval": 1.21875, "num_input_tokens_seen": 11766764, "step": 188 }, { "epoch": 0.6289517470881864, "grad_norm": 13.971025466918945, "learning_rate": 5e-06, "loss": 1.395, "num_input_tokens_seen": 11829848, "step": 189 }, { "epoch": 0.6289517470881864, "loss": 1.7523114681243896, "loss_ce": 0.011588791385293007, "loss_iou": 0.5390625, "loss_num": 0.1328125, "loss_xval": 1.7421875, "num_input_tokens_seen": 11829848, "step": 189 }, { "epoch": 0.632279534109817, "grad_norm": 34.798828125, "learning_rate": 5e-06, "loss": 1.3202, "num_input_tokens_seen": 11893144, "step": 190 }, { "epoch": 0.632279534109817, "loss": 1.4185304641723633, "loss_ce": 0.0669679343700409, "loss_iou": 0.369140625, "loss_num": 0.123046875, "loss_xval": 1.3515625, "num_input_tokens_seen": 11893144, "step": 190 }, { "epoch": 0.6356073211314476, "grad_norm": 23.807941436767578, "learning_rate": 5e-06, "loss": 1.3981, "num_input_tokens_seen": 11956764, "step": 191 }, { "epoch": 0.6356073211314476, "loss": 1.4876669645309448, "loss_ce": 0.012813407927751541, "loss_iou": 0.46484375, "loss_num": 0.10888671875, "loss_xval": 1.4765625, "num_input_tokens_seen": 11956764, "step": 191 }, { "epoch": 0.6389351081530782, "grad_norm": 10.772053718566895, "learning_rate": 5e-06, "loss": 1.3734, "num_input_tokens_seen": 12020672, "step": 192 }, { "epoch": 0.6389351081530782, "loss": 1.2527387142181396, "loss_ce": 0.014945641160011292, "loss_iou": 0.40234375, "loss_num": 0.08642578125, "loss_xval": 1.234375, "num_input_tokens_seen": 12020672, "step": 192 }, { "epoch": 0.6422628951747088, "grad_norm": 16.477436065673828, "learning_rate": 5e-06, "loss": 1.3591, "num_input_tokens_seen": 12083392, "step": 193 }, { "epoch": 0.6422628951747088, "loss": 1.431098461151123, "loss_ce": 0.002875822363421321, "loss_iou": 0.404296875, "loss_num": 0.12451171875, "loss_xval": 1.4296875, "num_input_tokens_seen": 12083392, "step": 193 }, { "epoch": 0.6455906821963394, "grad_norm": 12.675455093383789, "learning_rate": 5e-06, "loss": 1.3098, "num_input_tokens_seen": 12146444, "step": 194 }, { "epoch": 0.6455906821963394, "loss": 1.375436782836914, "loss_ce": 0.015329442918300629, "loss_iou": 0.392578125, "loss_num": 0.115234375, "loss_xval": 1.359375, "num_input_tokens_seen": 12146444, "step": 194 }, { "epoch": 0.64891846921797, "grad_norm": 15.562612533569336, "learning_rate": 5e-06, "loss": 1.0385, "num_input_tokens_seen": 12208136, "step": 195 }, { "epoch": 0.64891846921797, "loss": 1.0402494668960571, "loss_ce": 0.0009428322082385421, "loss_iou": 0.314453125, "loss_num": 0.08203125, "loss_xval": 1.0390625, "num_input_tokens_seen": 12208136, "step": 195 }, { "epoch": 0.6522462562396006, "grad_norm": 28.349838256835938, "learning_rate": 5e-06, "loss": 1.197, "num_input_tokens_seen": 12270244, "step": 196 }, { "epoch": 0.6522462562396006, "loss": 1.0950136184692383, "loss_ce": 0.006146472413092852, "loss_iou": 0.345703125, "loss_num": 0.07958984375, "loss_xval": 1.0859375, "num_input_tokens_seen": 12270244, "step": 196 }, { "epoch": 0.6555740432612313, "grad_norm": 16.476137161254883, "learning_rate": 5e-06, "loss": 1.3331, "num_input_tokens_seen": 12333024, "step": 197 }, { "epoch": 0.6555740432612313, "loss": 1.3048908710479736, "loss_ce": 0.005574405658990145, "loss_iou": 0.34375, "loss_num": 0.1220703125, "loss_xval": 1.296875, "num_input_tokens_seen": 12333024, "step": 197 }, { "epoch": 0.6589018302828619, "grad_norm": 13.58584976196289, "learning_rate": 5e-06, "loss": 1.2588, "num_input_tokens_seen": 12396340, "step": 198 }, { "epoch": 0.6589018302828619, "loss": 1.4304707050323486, "loss_ce": 0.005665954202413559, "loss_iou": 0.42578125, "loss_num": 0.115234375, "loss_xval": 1.421875, "num_input_tokens_seen": 12396340, "step": 198 }, { "epoch": 0.6622296173044925, "grad_norm": 38.477630615234375, "learning_rate": 5e-06, "loss": 1.3283, "num_input_tokens_seen": 12459396, "step": 199 }, { "epoch": 0.6622296173044925, "loss": 1.2707278728485107, "loss_ce": 0.000708332285284996, "loss_iou": 0.337890625, "loss_num": 0.11865234375, "loss_xval": 1.2734375, "num_input_tokens_seen": 12459396, "step": 199 }, { "epoch": 0.6655574043261231, "grad_norm": 30.41405487060547, "learning_rate": 5e-06, "loss": 1.1544, "num_input_tokens_seen": 12522324, "step": 200 }, { "epoch": 0.6655574043261231, "loss": 0.9297307133674622, "loss_ce": 0.007001231890171766, "loss_iou": 0.2109375, "loss_num": 0.10009765625, "loss_xval": 0.921875, "num_input_tokens_seen": 12522324, "step": 200 }, { "epoch": 0.6688851913477537, "grad_norm": 32.16323471069336, "learning_rate": 5e-06, "loss": 1.4879, "num_input_tokens_seen": 12585900, "step": 201 }, { "epoch": 0.6688851913477537, "loss": 1.5877773761749268, "loss_ce": 0.02771873027086258, "loss_iou": 0.427734375, "loss_num": 0.140625, "loss_xval": 1.5625, "num_input_tokens_seen": 12585900, "step": 201 }, { "epoch": 0.6722129783693843, "grad_norm": 19.184961318969727, "learning_rate": 5e-06, "loss": 1.3649, "num_input_tokens_seen": 12647776, "step": 202 }, { "epoch": 0.6722129783693843, "loss": 1.634108066558838, "loss_ce": 0.015455802902579308, "loss_iou": 0.515625, "loss_num": 0.11767578125, "loss_xval": 1.6171875, "num_input_tokens_seen": 12647776, "step": 202 }, { "epoch": 0.6755407653910149, "grad_norm": 13.572768211364746, "learning_rate": 5e-06, "loss": 1.0858, "num_input_tokens_seen": 12708056, "step": 203 }, { "epoch": 0.6755407653910149, "loss": 0.7612197995185852, "loss_ce": 0.04417877644300461, "loss_iou": 0.10546875, "loss_num": 0.10107421875, "loss_xval": 0.71875, "num_input_tokens_seen": 12708056, "step": 203 }, { "epoch": 0.6788685524126455, "grad_norm": 15.890493392944336, "learning_rate": 5e-06, "loss": 1.139, "num_input_tokens_seen": 12770736, "step": 204 }, { "epoch": 0.6788685524126455, "loss": 1.2692952156066895, "loss_ce": 0.0007404821808449924, "loss_iou": 0.40625, "loss_num": 0.0908203125, "loss_xval": 1.265625, "num_input_tokens_seen": 12770736, "step": 204 }, { "epoch": 0.6821963394342762, "grad_norm": 25.105133056640625, "learning_rate": 5e-06, "loss": 1.4867, "num_input_tokens_seen": 12834956, "step": 205 }, { "epoch": 0.6821963394342762, "loss": 1.2058188915252686, "loss_ce": 0.002937908982858062, "loss_iou": 0.421875, "loss_num": 0.072265625, "loss_xval": 1.203125, "num_input_tokens_seen": 12834956, "step": 205 }, { "epoch": 0.6855241264559068, "grad_norm": 57.6992073059082, "learning_rate": 5e-06, "loss": 1.2759, "num_input_tokens_seen": 12898988, "step": 206 }, { "epoch": 0.6855241264559068, "loss": 1.0813333988189697, "loss_ce": 0.03372599184513092, "loss_iou": 0.283203125, "loss_num": 0.09619140625, "loss_xval": 1.046875, "num_input_tokens_seen": 12898988, "step": 206 }, { "epoch": 0.6888519134775375, "grad_norm": 17.233978271484375, "learning_rate": 5e-06, "loss": 1.1929, "num_input_tokens_seen": 12962164, "step": 207 }, { "epoch": 0.6888519134775375, "loss": 1.2135835886001587, "loss_ce": 0.01924763433635235, "loss_iou": 0.439453125, "loss_num": 0.0625, "loss_xval": 1.1953125, "num_input_tokens_seen": 12962164, "step": 207 }, { "epoch": 0.6921797004991681, "grad_norm": 16.188570022583008, "learning_rate": 5e-06, "loss": 1.3785, "num_input_tokens_seen": 13024784, "step": 208 }, { "epoch": 0.6921797004991681, "loss": 1.215705156326294, "loss_ce": 0.005255852825939655, "loss_iou": 0.373046875, "loss_num": 0.09326171875, "loss_xval": 1.2109375, "num_input_tokens_seen": 13024784, "step": 208 }, { "epoch": 0.6955074875207987, "grad_norm": 23.61080551147461, "learning_rate": 5e-06, "loss": 1.3336, "num_input_tokens_seen": 13087760, "step": 209 }, { "epoch": 0.6955074875207987, "loss": 1.5246670246124268, "loss_ce": 0.002206052653491497, "loss_iou": 0.484375, "loss_num": 0.1103515625, "loss_xval": 1.5234375, "num_input_tokens_seen": 13087760, "step": 209 }, { "epoch": 0.6988352745424293, "grad_norm": 13.288503646850586, "learning_rate": 5e-06, "loss": 1.1365, "num_input_tokens_seen": 13150884, "step": 210 }, { "epoch": 0.6988352745424293, "loss": 1.2659443616867065, "loss_ce": 0.018141645938158035, "loss_iou": 0.40234375, "loss_num": 0.0888671875, "loss_xval": 1.25, "num_input_tokens_seen": 13150884, "step": 210 }, { "epoch": 0.7021630615640599, "grad_norm": 30.597015380859375, "learning_rate": 5e-06, "loss": 1.4653, "num_input_tokens_seen": 13214276, "step": 211 }, { "epoch": 0.7021630615640599, "loss": 1.5506691932678223, "loss_ce": 0.01111849956214428, "loss_iou": 0.470703125, "loss_num": 0.1201171875, "loss_xval": 1.5390625, "num_input_tokens_seen": 13214276, "step": 211 }, { "epoch": 0.7054908485856906, "grad_norm": 23.87419319152832, "learning_rate": 5e-06, "loss": 1.1347, "num_input_tokens_seen": 13276424, "step": 212 }, { "epoch": 0.7054908485856906, "loss": 1.1488089561462402, "loss_ce": 0.0018362791743129492, "loss_iou": 0.36328125, "loss_num": 0.08447265625, "loss_xval": 1.1484375, "num_input_tokens_seen": 13276424, "step": 212 }, { "epoch": 0.7088186356073212, "grad_norm": 16.2000732421875, "learning_rate": 5e-06, "loss": 1.2645, "num_input_tokens_seen": 13339488, "step": 213 }, { "epoch": 0.7088186356073212, "loss": 1.0974924564361572, "loss_ce": 0.0013010293478146195, "loss_iou": 0.3359375, "loss_num": 0.0849609375, "loss_xval": 1.09375, "num_input_tokens_seen": 13339488, "step": 213 }, { "epoch": 0.7121464226289518, "grad_norm": 34.518123626708984, "learning_rate": 5e-06, "loss": 1.0251, "num_input_tokens_seen": 13402452, "step": 214 }, { "epoch": 0.7121464226289518, "loss": 1.091152548789978, "loss_ce": 0.0013087954139336944, "loss_iou": 0.3359375, "loss_num": 0.083984375, "loss_xval": 1.09375, "num_input_tokens_seen": 13402452, "step": 214 }, { "epoch": 0.7154742096505824, "grad_norm": 13.655023574829102, "learning_rate": 5e-06, "loss": 1.157, "num_input_tokens_seen": 13465432, "step": 215 }, { "epoch": 0.7154742096505824, "loss": 1.149668574333191, "loss_ce": 0.00928770937025547, "loss_iou": 0.33984375, "loss_num": 0.09228515625, "loss_xval": 1.140625, "num_input_tokens_seen": 13465432, "step": 215 }, { "epoch": 0.718801996672213, "grad_norm": 25.365924835205078, "learning_rate": 5e-06, "loss": 1.125, "num_input_tokens_seen": 13528332, "step": 216 }, { "epoch": 0.718801996672213, "loss": 1.1836438179016113, "loss_ce": 0.13774539530277252, "loss_iou": 0.294921875, "loss_num": 0.0908203125, "loss_xval": 1.046875, "num_input_tokens_seen": 13528332, "step": 216 }, { "epoch": 0.7221297836938436, "grad_norm": 27.475000381469727, "learning_rate": 5e-06, "loss": 1.4337, "num_input_tokens_seen": 13590456, "step": 217 }, { "epoch": 0.7221297836938436, "loss": 1.3032087087631226, "loss_ce": 0.01927315816283226, "loss_iou": 0.40625, "loss_num": 0.09375, "loss_xval": 1.28125, "num_input_tokens_seen": 13590456, "step": 217 }, { "epoch": 0.7254575707154742, "grad_norm": 26.22903823852539, "learning_rate": 5e-06, "loss": 1.1997, "num_input_tokens_seen": 13653520, "step": 218 }, { "epoch": 0.7254575707154742, "loss": 1.0637105703353882, "loss_ce": 0.0012105784844607115, "loss_iou": 0.30859375, "loss_num": 0.0888671875, "loss_xval": 1.0625, "num_input_tokens_seen": 13653520, "step": 218 }, { "epoch": 0.7287853577371048, "grad_norm": 21.157546997070312, "learning_rate": 5e-06, "loss": 1.2113, "num_input_tokens_seen": 13717416, "step": 219 }, { "epoch": 0.7287853577371048, "loss": 1.2450811862945557, "loss_ce": 0.006799900438636541, "loss_iou": 0.408203125, "loss_num": 0.08447265625, "loss_xval": 1.234375, "num_input_tokens_seen": 13717416, "step": 219 }, { "epoch": 0.7321131447587355, "grad_norm": 16.073261260986328, "learning_rate": 5e-06, "loss": 1.3171, "num_input_tokens_seen": 13780356, "step": 220 }, { "epoch": 0.7321131447587355, "loss": 1.1784981489181519, "loss_ce": 0.005646565929055214, "loss_iou": 0.400390625, "loss_num": 0.07470703125, "loss_xval": 1.171875, "num_input_tokens_seen": 13780356, "step": 220 }, { "epoch": 0.7354409317803661, "grad_norm": 9.423686027526855, "learning_rate": 5e-06, "loss": 1.4231, "num_input_tokens_seen": 13843264, "step": 221 }, { "epoch": 0.7354409317803661, "loss": 1.6099144220352173, "loss_ce": 0.0015159165486693382, "loss_iou": 0.484375, "loss_num": 0.1279296875, "loss_xval": 1.609375, "num_input_tokens_seen": 13843264, "step": 221 }, { "epoch": 0.7387687188019967, "grad_norm": 13.743108749389648, "learning_rate": 5e-06, "loss": 1.1927, "num_input_tokens_seen": 13905668, "step": 222 }, { "epoch": 0.7387687188019967, "loss": 1.1869020462036133, "loss_ce": 0.001599297858774662, "loss_iou": 0.40234375, "loss_num": 0.07666015625, "loss_xval": 1.1875, "num_input_tokens_seen": 13905668, "step": 222 }, { "epoch": 0.7420965058236273, "grad_norm": 31.587129592895508, "learning_rate": 5e-06, "loss": 1.3488, "num_input_tokens_seen": 13968100, "step": 223 }, { "epoch": 0.7420965058236273, "loss": 1.445157527923584, "loss_ce": 0.01302863098680973, "loss_iou": 0.45703125, "loss_num": 0.10400390625, "loss_xval": 1.4296875, "num_input_tokens_seen": 13968100, "step": 223 }, { "epoch": 0.7454242928452579, "grad_norm": 11.37446403503418, "learning_rate": 5e-06, "loss": 1.1052, "num_input_tokens_seen": 14029952, "step": 224 }, { "epoch": 0.7454242928452579, "loss": 1.2456074953079224, "loss_ce": 0.0007344639161601663, "loss_iou": 0.416015625, "loss_num": 0.08251953125, "loss_xval": 1.2421875, "num_input_tokens_seen": 14029952, "step": 224 }, { "epoch": 0.7487520798668885, "grad_norm": 16.84494972229004, "learning_rate": 5e-06, "loss": 1.301, "num_input_tokens_seen": 14093280, "step": 225 }, { "epoch": 0.7487520798668885, "loss": 1.3011748790740967, "loss_ce": 0.015042103826999664, "loss_iou": 0.330078125, "loss_num": 0.12451171875, "loss_xval": 1.2890625, "num_input_tokens_seen": 14093280, "step": 225 }, { "epoch": 0.7520798668885191, "grad_norm": 9.384833335876465, "learning_rate": 5e-06, "loss": 1.2195, "num_input_tokens_seen": 14157284, "step": 226 }, { "epoch": 0.7520798668885191, "loss": 1.217372179031372, "loss_ce": 0.0010636431397870183, "loss_iou": 0.33203125, "loss_num": 0.11083984375, "loss_xval": 1.21875, "num_input_tokens_seen": 14157284, "step": 226 }, { "epoch": 0.7554076539101497, "grad_norm": 15.191215515136719, "learning_rate": 5e-06, "loss": 1.1685, "num_input_tokens_seen": 14219260, "step": 227 }, { "epoch": 0.7554076539101497, "loss": 1.1222490072250366, "loss_ce": 0.015803655609488487, "loss_iou": 0.369140625, "loss_num": 0.07373046875, "loss_xval": 1.109375, "num_input_tokens_seen": 14219260, "step": 227 }, { "epoch": 0.7587354409317804, "grad_norm": 24.131282806396484, "learning_rate": 5e-06, "loss": 1.1008, "num_input_tokens_seen": 14281308, "step": 228 }, { "epoch": 0.7587354409317804, "loss": 1.2230792045593262, "loss_ce": 0.0006670955335721374, "loss_iou": 0.36328125, "loss_num": 0.09912109375, "loss_xval": 1.21875, "num_input_tokens_seen": 14281308, "step": 228 }, { "epoch": 0.762063227953411, "grad_norm": 14.669888496398926, "learning_rate": 5e-06, "loss": 1.1266, "num_input_tokens_seen": 14343964, "step": 229 }, { "epoch": 0.762063227953411, "loss": 1.3784270286560059, "loss_ce": 0.008309826254844666, "loss_iou": 0.462890625, "loss_num": 0.08935546875, "loss_xval": 1.3671875, "num_input_tokens_seen": 14343964, "step": 229 }, { "epoch": 0.7653910149750416, "grad_norm": 18.83928871154785, "learning_rate": 5e-06, "loss": 1.3428, "num_input_tokens_seen": 14407128, "step": 230 }, { "epoch": 0.7653910149750416, "loss": 1.1617732048034668, "loss_ce": 0.0025936225429177284, "loss_iou": 0.361328125, "loss_num": 0.0869140625, "loss_xval": 1.15625, "num_input_tokens_seen": 14407128, "step": 230 }, { "epoch": 0.7687188019966722, "grad_norm": 14.189291000366211, "learning_rate": 5e-06, "loss": 0.9483, "num_input_tokens_seen": 14470136, "step": 231 }, { "epoch": 0.7687188019966722, "loss": 0.9246655702590942, "loss_ce": 0.01060311309993267, "loss_iou": 0.298828125, "loss_num": 0.0634765625, "loss_xval": 0.9140625, "num_input_tokens_seen": 14470136, "step": 231 }, { "epoch": 0.7720465890183028, "grad_norm": 14.045916557312012, "learning_rate": 5e-06, "loss": 0.8905, "num_input_tokens_seen": 14532512, "step": 232 }, { "epoch": 0.7720465890183028, "loss": 0.65977543592453, "loss_ce": 0.014023483730852604, "loss_iou": 0.166015625, "loss_num": 0.06298828125, "loss_xval": 0.64453125, "num_input_tokens_seen": 14532512, "step": 232 }, { "epoch": 0.7753743760399334, "grad_norm": 20.161911010742188, "learning_rate": 5e-06, "loss": 1.3524, "num_input_tokens_seen": 14594204, "step": 233 }, { "epoch": 0.7753743760399334, "loss": 1.3588159084320068, "loss_ce": 0.006765137426555157, "loss_iou": 0.46484375, "loss_num": 0.083984375, "loss_xval": 1.3515625, "num_input_tokens_seen": 14594204, "step": 233 }, { "epoch": 0.778702163061564, "grad_norm": 12.568875312805176, "learning_rate": 5e-06, "loss": 1.1444, "num_input_tokens_seen": 14656860, "step": 234 }, { "epoch": 0.778702163061564, "loss": 0.9393011331558228, "loss_ce": 0.014252342283725739, "loss_iou": 0.296875, "loss_num": 0.06640625, "loss_xval": 0.92578125, "num_input_tokens_seen": 14656860, "step": 234 }, { "epoch": 0.7820299500831946, "grad_norm": 21.507532119750977, "learning_rate": 5e-06, "loss": 1.3511, "num_input_tokens_seen": 14720468, "step": 235 }, { "epoch": 0.7820299500831946, "loss": 1.420245885848999, "loss_ce": 0.011066140606999397, "loss_iou": 0.482421875, "loss_num": 0.0888671875, "loss_xval": 1.40625, "num_input_tokens_seen": 14720468, "step": 235 }, { "epoch": 0.7853577371048253, "grad_norm": 33.57356643676758, "learning_rate": 5e-06, "loss": 1.4398, "num_input_tokens_seen": 14784184, "step": 236 }, { "epoch": 0.7853577371048253, "loss": 1.4989573955535889, "loss_ce": 0.0023754474241286516, "loss_iou": 0.458984375, "loss_num": 0.115234375, "loss_xval": 1.5, "num_input_tokens_seen": 14784184, "step": 236 }, { "epoch": 0.7886855241264559, "grad_norm": 13.441980361938477, "learning_rate": 5e-06, "loss": 0.9306, "num_input_tokens_seen": 14846184, "step": 237 }, { "epoch": 0.7886855241264559, "loss": 0.9496654272079468, "loss_ce": 0.007770880591124296, "loss_iou": 0.259765625, "loss_num": 0.083984375, "loss_xval": 0.94140625, "num_input_tokens_seen": 14846184, "step": 237 }, { "epoch": 0.7920133111480865, "grad_norm": 23.40178871154785, "learning_rate": 5e-06, "loss": 1.1604, "num_input_tokens_seen": 14908792, "step": 238 }, { "epoch": 0.7920133111480865, "loss": 1.1228361129760742, "loss_ce": 0.0007658317917957902, "loss_iou": 0.31640625, "loss_num": 0.09814453125, "loss_xval": 1.125, "num_input_tokens_seen": 14908792, "step": 238 }, { "epoch": 0.7953410981697171, "grad_norm": 29.678503036499023, "learning_rate": 5e-06, "loss": 1.4377, "num_input_tokens_seen": 14971520, "step": 239 }, { "epoch": 0.7953410981697171, "loss": 1.4047870635986328, "loss_ce": 0.006837797816842794, "loss_iou": 0.50390625, "loss_num": 0.07763671875, "loss_xval": 1.3984375, "num_input_tokens_seen": 14971520, "step": 239 }, { "epoch": 0.7986688851913477, "grad_norm": 20.842504501342773, "learning_rate": 5e-06, "loss": 1.1844, "num_input_tokens_seen": 15033176, "step": 240 }, { "epoch": 0.7986688851913477, "loss": 1.3855491876602173, "loss_ce": 0.02422105148434639, "loss_iou": 0.416015625, "loss_num": 0.10498046875, "loss_xval": 1.359375, "num_input_tokens_seen": 15033176, "step": 240 }, { "epoch": 0.8019966722129783, "grad_norm": 9.68075942993164, "learning_rate": 5e-06, "loss": 1.3038, "num_input_tokens_seen": 15094324, "step": 241 }, { "epoch": 0.8019966722129783, "loss": 1.2901484966278076, "loss_ce": 0.0098751625046134, "loss_iou": 0.359375, "loss_num": 0.1123046875, "loss_xval": 1.28125, "num_input_tokens_seen": 15094324, "step": 241 }, { "epoch": 0.8053244592346089, "grad_norm": 14.69822883605957, "learning_rate": 5e-06, "loss": 1.2158, "num_input_tokens_seen": 15156516, "step": 242 }, { "epoch": 0.8053244592346089, "loss": 1.2247101068496704, "loss_ce": 0.02549135871231556, "loss_iou": 0.33203125, "loss_num": 0.1064453125, "loss_xval": 1.203125, "num_input_tokens_seen": 15156516, "step": 242 }, { "epoch": 0.8086522462562395, "grad_norm": 16.155757904052734, "learning_rate": 5e-06, "loss": 1.2242, "num_input_tokens_seen": 15218404, "step": 243 }, { "epoch": 0.8086522462562395, "loss": 1.1564106941223145, "loss_ce": 0.015785671770572662, "loss_iou": 0.375, "loss_num": 0.07763671875, "loss_xval": 1.140625, "num_input_tokens_seen": 15218404, "step": 243 }, { "epoch": 0.8119800332778702, "grad_norm": 13.781820297241211, "learning_rate": 5e-06, "loss": 1.1725, "num_input_tokens_seen": 15282228, "step": 244 }, { "epoch": 0.8119800332778702, "loss": 1.1227807998657227, "loss_ce": 0.017311980947852135, "loss_iou": 0.384765625, "loss_num": 0.0673828125, "loss_xval": 1.109375, "num_input_tokens_seen": 15282228, "step": 244 }, { "epoch": 0.8153078202995009, "grad_norm": 15.639060020446777, "learning_rate": 5e-06, "loss": 1.1997, "num_input_tokens_seen": 15345232, "step": 245 }, { "epoch": 0.8153078202995009, "loss": 1.175334095954895, "loss_ce": 0.0005293773720040917, "loss_iou": 0.408203125, "loss_num": 0.07177734375, "loss_xval": 1.171875, "num_input_tokens_seen": 15345232, "step": 245 }, { "epoch": 0.8186356073211315, "grad_norm": 20.45536994934082, "learning_rate": 5e-06, "loss": 1.2539, "num_input_tokens_seen": 15408764, "step": 246 }, { "epoch": 0.8186356073211315, "loss": 1.2810962200164795, "loss_ce": 0.00033442690619267523, "loss_iou": 0.44921875, "loss_num": 0.076171875, "loss_xval": 1.28125, "num_input_tokens_seen": 15408764, "step": 246 }, { "epoch": 0.8219633943427621, "grad_norm": 12.981587409973145, "learning_rate": 5e-06, "loss": 1.0421, "num_input_tokens_seen": 15471008, "step": 247 }, { "epoch": 0.8219633943427621, "loss": 1.138602375984192, "loss_ce": 0.00383676472119987, "loss_iou": 0.38671875, "loss_num": 0.072265625, "loss_xval": 1.1328125, "num_input_tokens_seen": 15471008, "step": 247 }, { "epoch": 0.8252911813643927, "grad_norm": 51.6541633605957, "learning_rate": 5e-06, "loss": 0.8726, "num_input_tokens_seen": 15533376, "step": 248 }, { "epoch": 0.8252911813643927, "loss": 0.745193362236023, "loss_ce": 0.01130670215934515, "loss_iou": 0.1748046875, "loss_num": 0.07666015625, "loss_xval": 0.734375, "num_input_tokens_seen": 15533376, "step": 248 }, { "epoch": 0.8286189683860233, "grad_norm": 20.74786949157715, "learning_rate": 5e-06, "loss": 1.274, "num_input_tokens_seen": 15596260, "step": 249 }, { "epoch": 0.8286189683860233, "loss": 0.9992030262947083, "loss_ce": 0.005550671834498644, "loss_iou": 0.2236328125, "loss_num": 0.109375, "loss_xval": 0.9921875, "num_input_tokens_seen": 15596260, "step": 249 }, { "epoch": 0.831946755407654, "grad_norm": 60.06270980834961, "learning_rate": 5e-06, "loss": 1.4534, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.831946755407654, "eval_seeclick_CIoU": 0.12590423226356506, "eval_seeclick_GIoU": 0.17364877462387085, "eval_seeclick_IoU": 0.24328559637069702, "eval_seeclick_MAE_all": 0.18153271079063416, "eval_seeclick_MAE_h": 0.07490194030106068, "eval_seeclick_MAE_w": 0.10936548560857773, "eval_seeclick_MAE_x_boxes": 0.31923606991767883, "eval_seeclick_MAE_y_boxes": 0.09831257537007332, "eval_seeclick_NUM_probability": 0.9999359548091888, "eval_seeclick_inside_bbox": 0.30520834028720856, "eval_seeclick_loss": 2.6519570350646973, "eval_seeclick_loss_ce": 0.03889298997819424, "eval_seeclick_loss_iou": 0.8387451171875, "eval_seeclick_loss_num": 0.1761322021484375, "eval_seeclick_loss_xval": 2.558349609375, "eval_seeclick_runtime": 70.0545, "eval_seeclick_samples_per_second": 0.671, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.831946755407654, "eval_icons_CIoU": 0.07106863334774971, "eval_icons_GIoU": 0.19157247245311737, "eval_icons_IoU": 0.21291274577379227, "eval_icons_MAE_all": 0.1373068317770958, "eval_icons_MAE_h": 0.07063675299286842, "eval_icons_MAE_w": 0.13146401941776276, "eval_icons_MAE_x_boxes": 0.1353582739830017, "eval_icons_MAE_y_boxes": 0.05130962934345007, "eval_icons_NUM_probability": 0.9999799430370331, "eval_icons_inside_bbox": 0.2986111119389534, "eval_icons_loss": 2.3059701919555664, "eval_icons_loss_ce": 2.8694971661025193e-05, "eval_icons_loss_iou": 0.806396484375, "eval_icons_loss_num": 0.14170265197753906, "eval_icons_loss_xval": 2.322265625, "eval_icons_runtime": 66.8297, "eval_icons_samples_per_second": 0.748, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.831946755407654, "eval_screenspot_CIoU": 0.023540629694859188, "eval_screenspot_GIoU": 0.07850407063961029, "eval_screenspot_IoU": 0.1775170018275579, "eval_screenspot_MAE_all": 0.22443277140458426, "eval_screenspot_MAE_h": 0.09910715123017629, "eval_screenspot_MAE_w": 0.17734555900096893, "eval_screenspot_MAE_x_boxes": 0.31622066100438434, "eval_screenspot_MAE_y_boxes": 0.13959191491206488, "eval_screenspot_NUM_probability": 0.9998429814974467, "eval_screenspot_inside_bbox": 0.2912500003973643, "eval_screenspot_loss": 2.9710144996643066, "eval_screenspot_loss_ce": 0.0023049935698509216, "eval_screenspot_loss_iou": 0.92724609375, "eval_screenspot_loss_num": 0.23094685872395834, "eval_screenspot_loss_xval": 3.009765625, "eval_screenspot_runtime": 119.5082, "eval_screenspot_samples_per_second": 0.745, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.831946755407654, "eval_compot_CIoU": -0.05822424963116646, "eval_compot_GIoU": 0.039091480895876884, "eval_compot_IoU": 0.10423949733376503, "eval_compot_MAE_all": 0.23665092885494232, "eval_compot_MAE_h": 0.08639108017086983, "eval_compot_MAE_w": 0.25060585141181946, "eval_compot_MAE_x_boxes": 0.2275974601507187, "eval_compot_MAE_y_boxes": 0.14669033139944077, "eval_compot_NUM_probability": 0.9997861981391907, "eval_compot_inside_bbox": 0.2204861119389534, "eval_compot_loss": 3.053682804107666, "eval_compot_loss_ce": 0.0055244737304747105, "eval_compot_loss_iou": 0.95751953125, "eval_compot_loss_num": 0.2315673828125, "eval_compot_loss_xval": 3.07421875, "eval_compot_runtime": 67.9773, "eval_compot_samples_per_second": 0.736, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.831946755407654, "eval_custom_ui_MAE_all": 0.11324165761470795, "eval_custom_ui_MAE_x": 0.11285695433616638, "eval_custom_ui_MAE_y": 0.11362636089324951, "eval_custom_ui_NUM_probability": 0.9999939203262329, "eval_custom_ui_loss": 0.5810784697532654, "eval_custom_ui_loss_ce": 0.01076856441795826, "eval_custom_ui_loss_num": 0.1113433837890625, "eval_custom_ui_loss_xval": 0.556640625, "eval_custom_ui_runtime": 57.5498, "eval_custom_ui_samples_per_second": 0.869, "eval_custom_ui_steps_per_second": 0.035, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.831946755407654, "loss": 0.5382449626922607, "loss_ce": 0.010657099075615406, "loss_iou": 0.0, "loss_num": 0.10546875, "loss_xval": 0.52734375, "num_input_tokens_seen": 15658848, "step": 250 }, { "epoch": 0.8352745424292846, "grad_norm": 42.612083435058594, "learning_rate": 5e-06, "loss": 1.1362, "num_input_tokens_seen": 15721148, "step": 251 }, { "epoch": 0.8352745424292846, "loss": 1.1814727783203125, "loss_ce": 0.0027619153261184692, "loss_iou": 0.353515625, "loss_num": 0.09423828125, "loss_xval": 1.1796875, "num_input_tokens_seen": 15721148, "step": 251 }, { "epoch": 0.8386023294509152, "grad_norm": 13.122926712036133, "learning_rate": 5e-06, "loss": 1.0092, "num_input_tokens_seen": 15783360, "step": 252 }, { "epoch": 0.8386023294509152, "loss": 1.0700435638427734, "loss_ce": 0.0011958942050114274, "loss_iou": 0.33203125, "loss_num": 0.08056640625, "loss_xval": 1.0703125, "num_input_tokens_seen": 15783360, "step": 252 }, { "epoch": 0.8419301164725458, "grad_norm": 21.410629272460938, "learning_rate": 5e-06, "loss": 1.1255, "num_input_tokens_seen": 15845752, "step": 253 }, { "epoch": 0.8419301164725458, "loss": 1.3312301635742188, "loss_ce": 0.004081668332219124, "loss_iou": 0.416015625, "loss_num": 0.09814453125, "loss_xval": 1.328125, "num_input_tokens_seen": 15845752, "step": 253 }, { "epoch": 0.8452579034941764, "grad_norm": 15.538365364074707, "learning_rate": 5e-06, "loss": 1.2166, "num_input_tokens_seen": 15908960, "step": 254 }, { "epoch": 0.8452579034941764, "loss": 1.152634859085083, "loss_ce": 0.010545015335083008, "loss_iou": 0.375, "loss_num": 0.07861328125, "loss_xval": 1.140625, "num_input_tokens_seen": 15908960, "step": 254 }, { "epoch": 0.848585690515807, "grad_norm": 40.248104095458984, "learning_rate": 5e-06, "loss": 0.985, "num_input_tokens_seen": 15968648, "step": 255 }, { "epoch": 0.848585690515807, "loss": 0.7882208824157715, "loss_ce": 0.02186344563961029, "loss_iou": 0.1806640625, "loss_num": 0.0810546875, "loss_xval": 0.765625, "num_input_tokens_seen": 15968648, "step": 255 }, { "epoch": 0.8519134775374376, "grad_norm": 28.604537963867188, "learning_rate": 5e-06, "loss": 1.1575, "num_input_tokens_seen": 16030956, "step": 256 }, { "epoch": 0.8519134775374376, "loss": 1.093517780303955, "loss_ce": 0.017590083181858063, "loss_iou": 0.287109375, "loss_num": 0.1005859375, "loss_xval": 1.078125, "num_input_tokens_seen": 16030956, "step": 256 }, { "epoch": 0.8552412645590682, "grad_norm": 19.028539657592773, "learning_rate": 5e-06, "loss": 1.1595, "num_input_tokens_seen": 16092152, "step": 257 }, { "epoch": 0.8552412645590682, "loss": 1.3261454105377197, "loss_ce": 0.008274221792817116, "loss_iou": 0.37890625, "loss_num": 0.11181640625, "loss_xval": 1.3203125, "num_input_tokens_seen": 16092152, "step": 257 }, { "epoch": 0.8585690515806988, "grad_norm": 11.434488296508789, "learning_rate": 5e-06, "loss": 1.0794, "num_input_tokens_seen": 16155380, "step": 258 }, { "epoch": 0.8585690515806988, "loss": 1.0630097389221191, "loss_ce": 0.01808791421353817, "loss_iou": 0.34765625, "loss_num": 0.06982421875, "loss_xval": 1.046875, "num_input_tokens_seen": 16155380, "step": 258 }, { "epoch": 0.8618968386023295, "grad_norm": 8.151037216186523, "learning_rate": 5e-06, "loss": 0.9681, "num_input_tokens_seen": 16216940, "step": 259 }, { "epoch": 0.8618968386023295, "loss": 0.9589591026306152, "loss_ce": 0.0010733783710747957, "loss_iou": 0.2578125, "loss_num": 0.08837890625, "loss_xval": 0.95703125, "num_input_tokens_seen": 16216940, "step": 259 }, { "epoch": 0.8652246256239601, "grad_norm": 19.071046829223633, "learning_rate": 5e-06, "loss": 1.4472, "num_input_tokens_seen": 16280368, "step": 260 }, { "epoch": 0.8652246256239601, "loss": 1.5674210786819458, "loss_ce": 0.09525313228368759, "loss_iou": 0.466796875, "loss_num": 0.107421875, "loss_xval": 1.46875, "num_input_tokens_seen": 16280368, "step": 260 }, { "epoch": 0.8685524126455907, "grad_norm": 19.866905212402344, "learning_rate": 5e-06, "loss": 0.9902, "num_input_tokens_seen": 16343184, "step": 261 }, { "epoch": 0.8685524126455907, "loss": 0.8425556421279907, "loss_ce": 0.005885718856006861, "loss_iou": 0.2353515625, "loss_num": 0.0732421875, "loss_xval": 0.8359375, "num_input_tokens_seen": 16343184, "step": 261 }, { "epoch": 0.8718801996672213, "grad_norm": 26.4387264251709, "learning_rate": 5e-06, "loss": 1.1488, "num_input_tokens_seen": 16405096, "step": 262 }, { "epoch": 0.8718801996672213, "loss": 1.1173707246780396, "loss_ce": 0.0014038896188139915, "loss_iou": 0.33203125, "loss_num": 0.09033203125, "loss_xval": 1.1171875, "num_input_tokens_seen": 16405096, "step": 262 }, { "epoch": 0.8752079866888519, "grad_norm": 44.49690246582031, "learning_rate": 5e-06, "loss": 1.1406, "num_input_tokens_seen": 16468060, "step": 263 }, { "epoch": 0.8752079866888519, "loss": 1.2327797412872314, "loss_ce": 0.004508184269070625, "loss_iou": 0.388671875, "loss_num": 0.0908203125, "loss_xval": 1.2265625, "num_input_tokens_seen": 16468060, "step": 263 }, { "epoch": 0.8785357737104825, "grad_norm": 26.186443328857422, "learning_rate": 5e-06, "loss": 1.6099, "num_input_tokens_seen": 16532308, "step": 264 }, { "epoch": 0.8785357737104825, "loss": 1.6475975513458252, "loss_ce": 0.0040428778156638145, "loss_iou": 0.55078125, "loss_num": 0.1083984375, "loss_xval": 1.640625, "num_input_tokens_seen": 16532308, "step": 264 }, { "epoch": 0.8818635607321131, "grad_norm": 16.094806671142578, "learning_rate": 5e-06, "loss": 1.2654, "num_input_tokens_seen": 16595464, "step": 265 }, { "epoch": 0.8818635607321131, "loss": 1.439080834388733, "loss_ce": 0.015496812760829926, "loss_iou": 0.431640625, "loss_num": 0.1123046875, "loss_xval": 1.421875, "num_input_tokens_seen": 16595464, "step": 265 }, { "epoch": 0.8851913477537438, "grad_norm": 13.568002700805664, "learning_rate": 5e-06, "loss": 1.3184, "num_input_tokens_seen": 16659512, "step": 266 }, { "epoch": 0.8851913477537438, "loss": 1.2902538776397705, "loss_ce": 0.0011914295610040426, "loss_iou": 0.392578125, "loss_num": 0.10107421875, "loss_xval": 1.2890625, "num_input_tokens_seen": 16659512, "step": 266 }, { "epoch": 0.8885191347753744, "grad_norm": 17.312501907348633, "learning_rate": 5e-06, "loss": 1.2592, "num_input_tokens_seen": 16723264, "step": 267 }, { "epoch": 0.8885191347753744, "loss": 1.0298858880996704, "loss_ce": 0.006448333151638508, "loss_iou": 0.30859375, "loss_num": 0.0810546875, "loss_xval": 1.0234375, "num_input_tokens_seen": 16723264, "step": 267 }, { "epoch": 0.891846921797005, "grad_norm": 11.952759742736816, "learning_rate": 5e-06, "loss": 1.1869, "num_input_tokens_seen": 16786628, "step": 268 }, { "epoch": 0.891846921797005, "loss": 1.2707010507583618, "loss_ce": 0.002634685719385743, "loss_iou": 0.41015625, "loss_num": 0.08984375, "loss_xval": 1.265625, "num_input_tokens_seen": 16786628, "step": 268 }, { "epoch": 0.8951747088186356, "grad_norm": 81.69204711914062, "learning_rate": 5e-06, "loss": 1.0619, "num_input_tokens_seen": 16848308, "step": 269 }, { "epoch": 0.8951747088186356, "loss": 1.1395946741104126, "loss_ce": 0.0033642093185335398, "loss_iou": 0.287109375, "loss_num": 0.1123046875, "loss_xval": 1.1328125, "num_input_tokens_seen": 16848308, "step": 269 }, { "epoch": 0.8985024958402662, "grad_norm": 17.892982482910156, "learning_rate": 5e-06, "loss": 1.0715, "num_input_tokens_seen": 16910492, "step": 270 }, { "epoch": 0.8985024958402662, "loss": 0.9769110679626465, "loss_ce": 0.0023017562925815582, "loss_iou": 0.265625, "loss_num": 0.0888671875, "loss_xval": 0.9765625, "num_input_tokens_seen": 16910492, "step": 270 }, { "epoch": 0.9018302828618968, "grad_norm": 14.101139068603516, "learning_rate": 5e-06, "loss": 1.093, "num_input_tokens_seen": 16973704, "step": 271 }, { "epoch": 0.9018302828618968, "loss": 0.8828328847885132, "loss_ce": 0.00368250603787601, "loss_iou": 0.2421875, "loss_num": 0.0791015625, "loss_xval": 0.87890625, "num_input_tokens_seen": 16973704, "step": 271 }, { "epoch": 0.9051580698835274, "grad_norm": 17.054306030273438, "learning_rate": 5e-06, "loss": 1.1851, "num_input_tokens_seen": 17037116, "step": 272 }, { "epoch": 0.9051580698835274, "loss": 1.2159790992736816, "loss_ce": 0.0040650563314557076, "loss_iou": 0.369140625, "loss_num": 0.0947265625, "loss_xval": 1.2109375, "num_input_tokens_seen": 17037116, "step": 272 }, { "epoch": 0.908485856905158, "grad_norm": 19.807126998901367, "learning_rate": 5e-06, "loss": 1.0618, "num_input_tokens_seen": 17098136, "step": 273 }, { "epoch": 0.908485856905158, "loss": 1.046678066253662, "loss_ce": 0.020799197256565094, "loss_iou": 0.2080078125, "loss_num": 0.1220703125, "loss_xval": 1.0234375, "num_input_tokens_seen": 17098136, "step": 273 }, { "epoch": 0.9118136439267887, "grad_norm": 19.801219940185547, "learning_rate": 5e-06, "loss": 1.4729, "num_input_tokens_seen": 17160376, "step": 274 }, { "epoch": 0.9118136439267887, "loss": 1.3921507596969604, "loss_ce": 0.038146864622831345, "loss_iou": 0.4375, "loss_num": 0.095703125, "loss_xval": 1.3515625, "num_input_tokens_seen": 17160376, "step": 274 }, { "epoch": 0.9151414309484193, "grad_norm": 28.975116729736328, "learning_rate": 5e-06, "loss": 1.058, "num_input_tokens_seen": 17223484, "step": 275 }, { "epoch": 0.9151414309484193, "loss": 1.0364493131637573, "loss_ce": 0.0027578980661928654, "loss_iou": 0.33984375, "loss_num": 0.0703125, "loss_xval": 1.03125, "num_input_tokens_seen": 17223484, "step": 275 }, { "epoch": 0.9184692179700499, "grad_norm": 16.811128616333008, "learning_rate": 5e-06, "loss": 0.8941, "num_input_tokens_seen": 17285864, "step": 276 }, { "epoch": 0.9184692179700499, "loss": 0.9024735689163208, "loss_ce": 0.01306929811835289, "loss_iou": 0.236328125, "loss_num": 0.08349609375, "loss_xval": 0.890625, "num_input_tokens_seen": 17285864, "step": 276 }, { "epoch": 0.9217970049916805, "grad_norm": 16.22093391418457, "learning_rate": 5e-06, "loss": 1.0589, "num_input_tokens_seen": 17349464, "step": 277 }, { "epoch": 0.9217970049916805, "loss": 0.9893227219581604, "loss_ce": 0.0005531868082471192, "loss_iou": 0.2451171875, "loss_num": 0.099609375, "loss_xval": 0.98828125, "num_input_tokens_seen": 17349464, "step": 277 }, { "epoch": 0.9251247920133111, "grad_norm": 12.684296607971191, "learning_rate": 5e-06, "loss": 1.1196, "num_input_tokens_seen": 17412492, "step": 278 }, { "epoch": 0.9251247920133111, "loss": 0.9944457411766052, "loss_ce": 0.0027464870363473892, "loss_iou": 0.26953125, "loss_num": 0.09033203125, "loss_xval": 0.9921875, "num_input_tokens_seen": 17412492, "step": 278 }, { "epoch": 0.9284525790349417, "grad_norm": 54.076019287109375, "learning_rate": 5e-06, "loss": 1.2471, "num_input_tokens_seen": 17475792, "step": 279 }, { "epoch": 0.9284525790349417, "loss": 1.5113483667373657, "loss_ce": 0.0035358648747205734, "loss_iou": 0.453125, "loss_num": 0.12060546875, "loss_xval": 1.5078125, "num_input_tokens_seen": 17475792, "step": 279 }, { "epoch": 0.9317803660565723, "grad_norm": 46.81892013549805, "learning_rate": 5e-06, "loss": 1.3139, "num_input_tokens_seen": 17539360, "step": 280 }, { "epoch": 0.9317803660565723, "loss": 0.9593971967697144, "loss_ce": 0.013108117505908012, "loss_iou": 0.3125, "loss_num": 0.06396484375, "loss_xval": 0.9453125, "num_input_tokens_seen": 17539360, "step": 280 }, { "epoch": 0.9351081530782029, "grad_norm": 26.500545501708984, "learning_rate": 5e-06, "loss": 1.0319, "num_input_tokens_seen": 17602120, "step": 281 }, { "epoch": 0.9351081530782029, "loss": 0.7355974316596985, "loss_ce": 0.0031755524687469006, "loss_iou": 0.2431640625, "loss_num": 0.049072265625, "loss_xval": 0.734375, "num_input_tokens_seen": 17602120, "step": 281 }, { "epoch": 0.9384359400998337, "grad_norm": 26.911073684692383, "learning_rate": 5e-06, "loss": 1.0786, "num_input_tokens_seen": 17665200, "step": 282 }, { "epoch": 0.9384359400998337, "loss": 1.0884435176849365, "loss_ce": 0.0012852513464167714, "loss_iou": 0.275390625, "loss_num": 0.107421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 17665200, "step": 282 }, { "epoch": 0.9417637271214643, "grad_norm": 10.468093872070312, "learning_rate": 5e-06, "loss": 0.7664, "num_input_tokens_seen": 17727580, "step": 283 }, { "epoch": 0.9417637271214643, "loss": 0.8793269991874695, "loss_ce": 0.0031062541529536247, "loss_iou": 0.298828125, "loss_num": 0.055908203125, "loss_xval": 0.875, "num_input_tokens_seen": 17727580, "step": 283 }, { "epoch": 0.9450915141430949, "grad_norm": 9.961268424987793, "learning_rate": 5e-06, "loss": 0.9138, "num_input_tokens_seen": 17789316, "step": 284 }, { "epoch": 0.9450915141430949, "loss": 1.0281856060028076, "loss_ce": 0.002794938860461116, "loss_iou": 0.2177734375, "loss_num": 0.1181640625, "loss_xval": 1.0234375, "num_input_tokens_seen": 17789316, "step": 284 }, { "epoch": 0.9484193011647255, "grad_norm": 12.167577743530273, "learning_rate": 5e-06, "loss": 1.1296, "num_input_tokens_seen": 17852072, "step": 285 }, { "epoch": 0.9484193011647255, "loss": 1.0344905853271484, "loss_ce": 0.005193705670535564, "loss_iou": 0.322265625, "loss_num": 0.0771484375, "loss_xval": 1.03125, "num_input_tokens_seen": 17852072, "step": 285 }, { "epoch": 0.9517470881863561, "grad_norm": 12.375018119812012, "learning_rate": 5e-06, "loss": 1.0284, "num_input_tokens_seen": 17914960, "step": 286 }, { "epoch": 0.9517470881863561, "loss": 0.9589194059371948, "loss_ce": 0.009944751858711243, "loss_iou": 0.3125, "loss_num": 0.06494140625, "loss_xval": 0.94921875, "num_input_tokens_seen": 17914960, "step": 286 }, { "epoch": 0.9550748752079867, "grad_norm": 27.08910369873047, "learning_rate": 5e-06, "loss": 1.1156, "num_input_tokens_seen": 17976848, "step": 287 }, { "epoch": 0.9550748752079867, "loss": 0.7211456298828125, "loss_ce": 0.0006866899784654379, "loss_iou": 0.1591796875, "loss_num": 0.08056640625, "loss_xval": 0.71875, "num_input_tokens_seen": 17976848, "step": 287 }, { "epoch": 0.9584026622296173, "grad_norm": 20.21120262145996, "learning_rate": 5e-06, "loss": 0.9142, "num_input_tokens_seen": 18038416, "step": 288 }, { "epoch": 0.9584026622296173, "loss": 1.0173907279968262, "loss_ce": 0.010310713201761246, "loss_iou": 0.310546875, "loss_num": 0.0771484375, "loss_xval": 1.0078125, "num_input_tokens_seen": 18038416, "step": 288 }, { "epoch": 0.961730449251248, "grad_norm": 25.247774124145508, "learning_rate": 5e-06, "loss": 1.3575, "num_input_tokens_seen": 18101284, "step": 289 }, { "epoch": 0.961730449251248, "loss": 1.379800796508789, "loss_ce": 0.005777302198112011, "loss_iou": 0.408203125, "loss_num": 0.111328125, "loss_xval": 1.375, "num_input_tokens_seen": 18101284, "step": 289 }, { "epoch": 0.9650582362728786, "grad_norm": 18.5815486907959, "learning_rate": 5e-06, "loss": 1.1872, "num_input_tokens_seen": 18163408, "step": 290 }, { "epoch": 0.9650582362728786, "loss": 1.0184900760650635, "loss_ce": 0.000667768414132297, "loss_iou": 0.26953125, "loss_num": 0.09619140625, "loss_xval": 1.015625, "num_input_tokens_seen": 18163408, "step": 290 }, { "epoch": 0.9683860232945092, "grad_norm": 15.90703010559082, "learning_rate": 5e-06, "loss": 1.0381, "num_input_tokens_seen": 18225688, "step": 291 }, { "epoch": 0.9683860232945092, "loss": 0.9319148659706116, "loss_ce": 0.00027425718144513667, "loss_iou": 0.296875, "loss_num": 0.0673828125, "loss_xval": 0.9296875, "num_input_tokens_seen": 18225688, "step": 291 }, { "epoch": 0.9717138103161398, "grad_norm": 16.51078987121582, "learning_rate": 5e-06, "loss": 1.0699, "num_input_tokens_seen": 18288152, "step": 292 }, { "epoch": 0.9717138103161398, "loss": 1.2005267143249512, "loss_ce": 0.0009417659603059292, "loss_iou": 0.41015625, "loss_num": 0.07568359375, "loss_xval": 1.203125, "num_input_tokens_seen": 18288152, "step": 292 }, { "epoch": 0.9750415973377704, "grad_norm": 21.68274688720703, "learning_rate": 5e-06, "loss": 1.1143, "num_input_tokens_seen": 18351572, "step": 293 }, { "epoch": 0.9750415973377704, "loss": 1.3064453601837158, "loss_ce": 0.0044434284791350365, "loss_iou": 0.41796875, "loss_num": 0.0927734375, "loss_xval": 1.3046875, "num_input_tokens_seen": 18351572, "step": 293 }, { "epoch": 0.978369384359401, "grad_norm": 17.310152053833008, "learning_rate": 5e-06, "loss": 1.0826, "num_input_tokens_seen": 18415056, "step": 294 }, { "epoch": 0.978369384359401, "loss": 0.8527212142944336, "loss_ce": 0.004210504237562418, "loss_iou": 0.265625, "loss_num": 0.06396484375, "loss_xval": 0.84765625, "num_input_tokens_seen": 18415056, "step": 294 }, { "epoch": 0.9816971713810316, "grad_norm": 48.573822021484375, "learning_rate": 5e-06, "loss": 1.1994, "num_input_tokens_seen": 18477488, "step": 295 }, { "epoch": 0.9816971713810316, "loss": 1.2597250938415527, "loss_ce": 0.020955566316843033, "loss_iou": 0.400390625, "loss_num": 0.08740234375, "loss_xval": 1.2421875, "num_input_tokens_seen": 18477488, "step": 295 }, { "epoch": 0.9850249584026622, "grad_norm": 25.190378189086914, "learning_rate": 5e-06, "loss": 1.2747, "num_input_tokens_seen": 18541468, "step": 296 }, { "epoch": 0.9850249584026622, "loss": 1.3697137832641602, "loss_ce": 0.002770456252619624, "loss_iou": 0.4140625, "loss_num": 0.107421875, "loss_xval": 1.3671875, "num_input_tokens_seen": 18541468, "step": 296 }, { "epoch": 0.9883527454242929, "grad_norm": 13.586545944213867, "learning_rate": 5e-06, "loss": 1.2876, "num_input_tokens_seen": 18604076, "step": 297 }, { "epoch": 0.9883527454242929, "loss": 1.3679777383804321, "loss_ce": 0.004940649960190058, "loss_iou": 0.435546875, "loss_num": 0.0986328125, "loss_xval": 1.359375, "num_input_tokens_seen": 18604076, "step": 297 }, { "epoch": 0.9916805324459235, "grad_norm": 20.70176887512207, "learning_rate": 5e-06, "loss": 0.8859, "num_input_tokens_seen": 18667228, "step": 298 }, { "epoch": 0.9916805324459235, "loss": 0.9582436084747314, "loss_ce": 0.0007241345010697842, "loss_iou": 0.279296875, "loss_num": 0.07958984375, "loss_xval": 0.95703125, "num_input_tokens_seen": 18667228, "step": 298 }, { "epoch": 0.9950083194675541, "grad_norm": 11.641081809997559, "learning_rate": 5e-06, "loss": 1.1695, "num_input_tokens_seen": 18730164, "step": 299 }, { "epoch": 0.9950083194675541, "loss": 1.2555705308914185, "loss_ce": 0.029008055105805397, "loss_iou": 0.3671875, "loss_num": 0.09814453125, "loss_xval": 1.2265625, "num_input_tokens_seen": 18730164, "step": 299 }, { "epoch": 0.9983361064891847, "grad_norm": 8.146730422973633, "learning_rate": 5e-06, "loss": 1.0957, "num_input_tokens_seen": 18795388, "step": 300 }, { "epoch": 0.9983361064891847, "loss": 1.0049164295196533, "loss_ce": 0.002475087298080325, "loss_iou": 0.337890625, "loss_num": 0.06494140625, "loss_xval": 1.0, "num_input_tokens_seen": 18795388, "step": 300 }, { "epoch": 0.9983361064891847, "loss": 1.238393783569336, "loss_ce": 0.0025539840571582317, "loss_iou": 0.384765625, "loss_num": 0.09375, "loss_xval": 1.234375, "num_input_tokens_seen": 18827556, "step": 300 }, { "epoch": 1.0016638935108153, "grad_norm": 13.772902488708496, "learning_rate": 5e-06, "loss": 1.0861, "num_input_tokens_seen": 18858660, "step": 301 }, { "epoch": 1.0016638935108153, "loss": 0.933795690536499, "loss_ce": 0.000690196524374187, "loss_iou": 0.26171875, "loss_num": 0.08154296875, "loss_xval": 0.93359375, "num_input_tokens_seen": 18858660, "step": 301 }, { "epoch": 1.004991680532446, "grad_norm": 92.42483520507812, "learning_rate": 5e-06, "loss": 1.1483, "num_input_tokens_seen": 18921372, "step": 302 }, { "epoch": 1.004991680532446, "loss": 1.1891447305679321, "loss_ce": 0.002133029280230403, "loss_iou": 0.3671875, "loss_num": 0.0908203125, "loss_xval": 1.1875, "num_input_tokens_seen": 18921372, "step": 302 }, { "epoch": 1.0083194675540765, "grad_norm": 13.548688888549805, "learning_rate": 5e-06, "loss": 0.8687, "num_input_tokens_seen": 18982324, "step": 303 }, { "epoch": 1.0083194675540765, "loss": 0.7147096395492554, "loss_ce": 0.003283819416537881, "loss_iou": 0.1640625, "loss_num": 0.07666015625, "loss_xval": 0.7109375, "num_input_tokens_seen": 18982324, "step": 303 }, { "epoch": 1.0116472545757071, "grad_norm": 16.49581527709961, "learning_rate": 5e-06, "loss": 1.1634, "num_input_tokens_seen": 19044756, "step": 304 }, { "epoch": 1.0116472545757071, "loss": 1.4042516946792603, "loss_ce": 0.0009313594782724977, "loss_iou": 0.43359375, "loss_num": 0.107421875, "loss_xval": 1.40625, "num_input_tokens_seen": 19044756, "step": 304 }, { "epoch": 1.0149750415973378, "grad_norm": 26.810819625854492, "learning_rate": 5e-06, "loss": 0.9443, "num_input_tokens_seen": 19106748, "step": 305 }, { "epoch": 1.0149750415973378, "loss": 0.8466845750808716, "loss_ce": 0.002446266822516918, "loss_iou": 0.2314453125, "loss_num": 0.076171875, "loss_xval": 0.84375, "num_input_tokens_seen": 19106748, "step": 305 }, { "epoch": 1.0183028286189684, "grad_norm": 11.85273551940918, "learning_rate": 5e-06, "loss": 1.1287, "num_input_tokens_seen": 19169128, "step": 306 }, { "epoch": 1.0183028286189684, "loss": 1.0434765815734863, "loss_ce": 0.00026368399267084897, "loss_iou": 0.388671875, "loss_num": 0.053466796875, "loss_xval": 1.046875, "num_input_tokens_seen": 19169128, "step": 306 }, { "epoch": 1.021630615640599, "grad_norm": 14.573290824890137, "learning_rate": 5e-06, "loss": 1.0402, "num_input_tokens_seen": 19232736, "step": 307 }, { "epoch": 1.021630615640599, "loss": 1.0792515277862549, "loss_ce": 0.003812062554061413, "loss_iou": 0.36328125, "loss_num": 0.0703125, "loss_xval": 1.078125, "num_input_tokens_seen": 19232736, "step": 307 }, { "epoch": 1.0249584026622296, "grad_norm": 16.600948333740234, "learning_rate": 5e-06, "loss": 1.3143, "num_input_tokens_seen": 19294736, "step": 308 }, { "epoch": 1.0249584026622296, "loss": 1.2582886219024658, "loss_ce": 0.008776895701885223, "loss_iou": 0.39453125, "loss_num": 0.091796875, "loss_xval": 1.25, "num_input_tokens_seen": 19294736, "step": 308 }, { "epoch": 1.0282861896838602, "grad_norm": 11.475916862487793, "learning_rate": 5e-06, "loss": 0.8611, "num_input_tokens_seen": 19354768, "step": 309 }, { "epoch": 1.0282861896838602, "loss": 0.5999712944030762, "loss_ce": 0.002803354524075985, "loss_iou": 0.01251220703125, "loss_num": 0.11474609375, "loss_xval": 0.59765625, "num_input_tokens_seen": 19354768, "step": 309 }, { "epoch": 1.0316139767054908, "grad_norm": 16.11930274963379, "learning_rate": 5e-06, "loss": 1.2539, "num_input_tokens_seen": 19418164, "step": 310 }, { "epoch": 1.0316139767054908, "loss": 1.3940266370773315, "loss_ce": 0.0014484911225736141, "loss_iou": 0.416015625, "loss_num": 0.1123046875, "loss_xval": 1.390625, "num_input_tokens_seen": 19418164, "step": 310 }, { "epoch": 1.0349417637271214, "grad_norm": 25.261287689208984, "learning_rate": 5e-06, "loss": 1.2662, "num_input_tokens_seen": 19481684, "step": 311 }, { "epoch": 1.0349417637271214, "loss": 1.263882040977478, "loss_ce": 0.009487524628639221, "loss_iou": 0.4296875, "loss_num": 0.07958984375, "loss_xval": 1.2578125, "num_input_tokens_seen": 19481684, "step": 311 }, { "epoch": 1.038269550748752, "grad_norm": 11.63999080657959, "learning_rate": 5e-06, "loss": 1.2871, "num_input_tokens_seen": 19544500, "step": 312 }, { "epoch": 1.038269550748752, "loss": 1.305168628692627, "loss_ce": 0.0012135988799855113, "loss_iou": 0.4296875, "loss_num": 0.0888671875, "loss_xval": 1.3046875, "num_input_tokens_seen": 19544500, "step": 312 }, { "epoch": 1.0415973377703827, "grad_norm": 12.800460815429688, "learning_rate": 5e-06, "loss": 1.0853, "num_input_tokens_seen": 19609040, "step": 313 }, { "epoch": 1.0415973377703827, "loss": 1.041963815689087, "loss_ce": 0.0007040311465971172, "loss_iou": 0.34765625, "loss_num": 0.0693359375, "loss_xval": 1.0390625, "num_input_tokens_seen": 19609040, "step": 313 }, { "epoch": 1.0449251247920133, "grad_norm": 54.40293502807617, "learning_rate": 5e-06, "loss": 0.9638, "num_input_tokens_seen": 19671612, "step": 314 }, { "epoch": 1.0449251247920133, "loss": 0.9310311079025269, "loss_ce": 0.004761609248816967, "loss_iou": 0.3125, "loss_num": 0.060546875, "loss_xval": 0.92578125, "num_input_tokens_seen": 19671612, "step": 314 }, { "epoch": 1.0482529118136439, "grad_norm": 16.393136978149414, "learning_rate": 5e-06, "loss": 1.0914, "num_input_tokens_seen": 19733824, "step": 315 }, { "epoch": 1.0482529118136439, "loss": 1.1248711347579956, "loss_ce": 0.002312490250915289, "loss_iou": 0.3984375, "loss_num": 0.06494140625, "loss_xval": 1.125, "num_input_tokens_seen": 19733824, "step": 315 }, { "epoch": 1.0515806988352745, "grad_norm": 19.917030334472656, "learning_rate": 5e-06, "loss": 0.8806, "num_input_tokens_seen": 19794408, "step": 316 }, { "epoch": 1.0515806988352745, "loss": 0.5999534130096436, "loss_ce": 0.0005881565157324076, "loss_iou": 0.0, "loss_num": 0.1201171875, "loss_xval": 0.59765625, "num_input_tokens_seen": 19794408, "step": 316 }, { "epoch": 1.054908485856905, "grad_norm": 23.361522674560547, "learning_rate": 5e-06, "loss": 1.2846, "num_input_tokens_seen": 19857392, "step": 317 }, { "epoch": 1.054908485856905, "loss": 1.1950931549072266, "loss_ce": 0.0005131502402946353, "loss_iou": 0.357421875, "loss_num": 0.09619140625, "loss_xval": 1.1953125, "num_input_tokens_seen": 19857392, "step": 317 }, { "epoch": 1.0582362728785357, "grad_norm": 15.505134582519531, "learning_rate": 5e-06, "loss": 0.9283, "num_input_tokens_seen": 19921056, "step": 318 }, { "epoch": 1.0582362728785357, "loss": 1.0836975574493408, "loss_ce": 0.0006896366830915213, "loss_iou": 0.349609375, "loss_num": 0.07666015625, "loss_xval": 1.0859375, "num_input_tokens_seen": 19921056, "step": 318 }, { "epoch": 1.0615640599001663, "grad_norm": 15.914534568786621, "learning_rate": 5e-06, "loss": 0.8054, "num_input_tokens_seen": 19981520, "step": 319 }, { "epoch": 1.0615640599001663, "loss": 0.7295636534690857, "loss_ce": 0.004862701054662466, "loss_iou": 0.134765625, "loss_num": 0.09130859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 19981520, "step": 319 }, { "epoch": 1.064891846921797, "grad_norm": 13.378053665161133, "learning_rate": 5e-06, "loss": 1.1706, "num_input_tokens_seen": 20043756, "step": 320 }, { "epoch": 1.064891846921797, "loss": 1.3541194200515747, "loss_ce": 0.0010921121574938297, "loss_iou": 0.48828125, "loss_num": 0.0751953125, "loss_xval": 1.3515625, "num_input_tokens_seen": 20043756, "step": 320 }, { "epoch": 1.0682196339434276, "grad_norm": 27.88616943359375, "learning_rate": 5e-06, "loss": 1.2254, "num_input_tokens_seen": 20107020, "step": 321 }, { "epoch": 1.0682196339434276, "loss": 1.242746114730835, "loss_ce": 0.0015352306654676795, "loss_iou": 0.40234375, "loss_num": 0.08740234375, "loss_xval": 1.2421875, "num_input_tokens_seen": 20107020, "step": 321 }, { "epoch": 1.0715474209650582, "grad_norm": 16.343183517456055, "learning_rate": 5e-06, "loss": 1.1786, "num_input_tokens_seen": 20169684, "step": 322 }, { "epoch": 1.0715474209650582, "loss": 1.4232938289642334, "loss_ce": 0.00923123024404049, "loss_iou": 0.404296875, "loss_num": 0.12109375, "loss_xval": 1.4140625, "num_input_tokens_seen": 20169684, "step": 322 }, { "epoch": 1.0748752079866888, "grad_norm": 19.044235229492188, "learning_rate": 5e-06, "loss": 1.1389, "num_input_tokens_seen": 20231040, "step": 323 }, { "epoch": 1.0748752079866888, "loss": 1.2635955810546875, "loss_ce": 0.0021209523547440767, "loss_iou": 0.419921875, "loss_num": 0.08447265625, "loss_xval": 1.2578125, "num_input_tokens_seen": 20231040, "step": 323 }, { "epoch": 1.0782029950083194, "grad_norm": 22.810884475708008, "learning_rate": 5e-06, "loss": 1.1854, "num_input_tokens_seen": 20294960, "step": 324 }, { "epoch": 1.0782029950083194, "loss": 1.3016592264175415, "loss_ce": 0.005028393119573593, "loss_iou": 0.427734375, "loss_num": 0.087890625, "loss_xval": 1.296875, "num_input_tokens_seen": 20294960, "step": 324 }, { "epoch": 1.08153078202995, "grad_norm": 26.073562622070312, "learning_rate": 5e-06, "loss": 1.0587, "num_input_tokens_seen": 20358040, "step": 325 }, { "epoch": 1.08153078202995, "loss": 1.235098123550415, "loss_ce": 0.001699719694443047, "loss_iou": 0.4140625, "loss_num": 0.08154296875, "loss_xval": 1.234375, "num_input_tokens_seen": 20358040, "step": 325 }, { "epoch": 1.0848585690515806, "grad_norm": 13.904311180114746, "learning_rate": 5e-06, "loss": 1.253, "num_input_tokens_seen": 20421232, "step": 326 }, { "epoch": 1.0848585690515806, "loss": 1.2734218835830688, "loss_ce": 0.001449216390028596, "loss_iou": 0.408203125, "loss_num": 0.09130859375, "loss_xval": 1.2734375, "num_input_tokens_seen": 20421232, "step": 326 }, { "epoch": 1.0881863560732112, "grad_norm": 15.164936065673828, "learning_rate": 5e-06, "loss": 1.1673, "num_input_tokens_seen": 20483820, "step": 327 }, { "epoch": 1.0881863560732112, "loss": 1.1625075340270996, "loss_ce": 0.0007643033168278635, "loss_iou": 0.390625, "loss_num": 0.076171875, "loss_xval": 1.1640625, "num_input_tokens_seen": 20483820, "step": 327 }, { "epoch": 1.0915141430948418, "grad_norm": 24.25714874267578, "learning_rate": 5e-06, "loss": 1.2235, "num_input_tokens_seen": 20547208, "step": 328 }, { "epoch": 1.0915141430948418, "loss": 1.3560025691986084, "loss_ce": 0.0029753113631159067, "loss_iou": 0.447265625, "loss_num": 0.09130859375, "loss_xval": 1.3515625, "num_input_tokens_seen": 20547208, "step": 328 }, { "epoch": 1.0948419301164725, "grad_norm": 27.510494232177734, "learning_rate": 5e-06, "loss": 0.8721, "num_input_tokens_seen": 20610188, "step": 329 }, { "epoch": 1.0948419301164725, "loss": 0.7758044600486755, "loss_ce": 0.00029176787938922644, "loss_iou": 0.27734375, "loss_num": 0.043701171875, "loss_xval": 0.77734375, "num_input_tokens_seen": 20610188, "step": 329 }, { "epoch": 1.098169717138103, "grad_norm": 162.73048400878906, "learning_rate": 5e-06, "loss": 1.1872, "num_input_tokens_seen": 20674032, "step": 330 }, { "epoch": 1.098169717138103, "loss": 0.9909277558326721, "loss_ce": 0.0016699021216481924, "loss_iou": 0.357421875, "loss_num": 0.0546875, "loss_xval": 0.98828125, "num_input_tokens_seen": 20674032, "step": 330 }, { "epoch": 1.1014975041597337, "grad_norm": 10.696892738342285, "learning_rate": 5e-06, "loss": 1.1888, "num_input_tokens_seen": 20735628, "step": 331 }, { "epoch": 1.1014975041597337, "loss": 1.2723336219787598, "loss_ce": 0.0003609945997595787, "loss_iou": 0.396484375, "loss_num": 0.095703125, "loss_xval": 1.2734375, "num_input_tokens_seen": 20735628, "step": 331 }, { "epoch": 1.1048252911813643, "grad_norm": 51.85942459106445, "learning_rate": 5e-06, "loss": 1.2714, "num_input_tokens_seen": 20799440, "step": 332 }, { "epoch": 1.1048252911813643, "loss": 1.278044581413269, "loss_ce": 0.001189130125567317, "loss_iou": 0.388671875, "loss_num": 0.10009765625, "loss_xval": 1.2734375, "num_input_tokens_seen": 20799440, "step": 332 }, { "epoch": 1.108153078202995, "grad_norm": 10.22545051574707, "learning_rate": 5e-06, "loss": 0.7951, "num_input_tokens_seen": 20861468, "step": 333 }, { "epoch": 1.108153078202995, "loss": 1.0232528448104858, "loss_ce": 0.000547763193026185, "loss_iou": 0.310546875, "loss_num": 0.08056640625, "loss_xval": 1.0234375, "num_input_tokens_seen": 20861468, "step": 333 }, { "epoch": 1.1114808652246255, "grad_norm": 8.053681373596191, "learning_rate": 5e-06, "loss": 1.1034, "num_input_tokens_seen": 20923164, "step": 334 }, { "epoch": 1.1114808652246255, "loss": 1.012281060218811, "loss_ce": 0.0009285883279517293, "loss_iou": 0.302734375, "loss_num": 0.08154296875, "loss_xval": 1.0078125, "num_input_tokens_seen": 20923164, "step": 334 }, { "epoch": 1.1148086522462561, "grad_norm": 15.463237762451172, "learning_rate": 5e-06, "loss": 1.1601, "num_input_tokens_seen": 20987752, "step": 335 }, { "epoch": 1.1148086522462561, "loss": 0.9524677395820618, "loss_ce": 0.000807545380666852, "loss_iou": 0.3203125, "loss_num": 0.062255859375, "loss_xval": 0.953125, "num_input_tokens_seen": 20987752, "step": 335 }, { "epoch": 1.1181364392678868, "grad_norm": 10.856858253479004, "learning_rate": 5e-06, "loss": 1.1705, "num_input_tokens_seen": 21050632, "step": 336 }, { "epoch": 1.1181364392678868, "loss": 1.1689019203186035, "loss_ce": 0.003130426863208413, "loss_iou": 0.396484375, "loss_num": 0.07421875, "loss_xval": 1.1640625, "num_input_tokens_seen": 21050632, "step": 336 }, { "epoch": 1.1214642262895174, "grad_norm": 13.99282455444336, "learning_rate": 5e-06, "loss": 1.4048, "num_input_tokens_seen": 21112928, "step": 337 }, { "epoch": 1.1214642262895174, "loss": 1.4012787342071533, "loss_ce": 0.0016206144355237484, "loss_iou": 0.423828125, "loss_num": 0.11083984375, "loss_xval": 1.3984375, "num_input_tokens_seen": 21112928, "step": 337 }, { "epoch": 1.124792013311148, "grad_norm": 16.16678237915039, "learning_rate": 5e-06, "loss": 1.147, "num_input_tokens_seen": 21176408, "step": 338 }, { "epoch": 1.124792013311148, "loss": 0.9964649081230164, "loss_ce": 0.00281256134621799, "loss_iou": 0.26171875, "loss_num": 0.09423828125, "loss_xval": 0.9921875, "num_input_tokens_seen": 21176408, "step": 338 }, { "epoch": 1.1281198003327786, "grad_norm": 16.422903060913086, "learning_rate": 5e-06, "loss": 1.1554, "num_input_tokens_seen": 21238804, "step": 339 }, { "epoch": 1.1281198003327786, "loss": 1.3646725416183472, "loss_ce": 0.006762400269508362, "loss_iou": 0.345703125, "loss_num": 0.1337890625, "loss_xval": 1.359375, "num_input_tokens_seen": 21238804, "step": 339 }, { "epoch": 1.1314475873544092, "grad_norm": 10.378382682800293, "learning_rate": 5e-06, "loss": 1.0336, "num_input_tokens_seen": 21300804, "step": 340 }, { "epoch": 1.1314475873544092, "loss": 1.2877411842346191, "loss_ce": 0.03554388880729675, "loss_iou": 0.41015625, "loss_num": 0.0859375, "loss_xval": 1.25, "num_input_tokens_seen": 21300804, "step": 340 }, { "epoch": 1.1347753743760398, "grad_norm": 18.60308074951172, "learning_rate": 5e-06, "loss": 1.0494, "num_input_tokens_seen": 21364188, "step": 341 }, { "epoch": 1.1347753743760398, "loss": 1.0969760417938232, "loss_ce": 0.0027377367950975895, "loss_iou": 0.39453125, "loss_num": 0.06103515625, "loss_xval": 1.09375, "num_input_tokens_seen": 21364188, "step": 341 }, { "epoch": 1.1381031613976704, "grad_norm": 20.462738037109375, "learning_rate": 5e-06, "loss": 1.1857, "num_input_tokens_seen": 21426420, "step": 342 }, { "epoch": 1.1381031613976704, "loss": 1.0656057596206665, "loss_ce": 0.0026175249367952347, "loss_iou": 0.3203125, "loss_num": 0.08447265625, "loss_xval": 1.0625, "num_input_tokens_seen": 21426420, "step": 342 }, { "epoch": 1.1414309484193013, "grad_norm": 21.535289764404297, "learning_rate": 5e-06, "loss": 1.1397, "num_input_tokens_seen": 21489396, "step": 343 }, { "epoch": 1.1414309484193013, "loss": 1.322596549987793, "loss_ce": 0.0036267684772610664, "loss_iou": 0.421875, "loss_num": 0.09521484375, "loss_xval": 1.3203125, "num_input_tokens_seen": 21489396, "step": 343 }, { "epoch": 1.1447587354409319, "grad_norm": 17.741491317749023, "learning_rate": 5e-06, "loss": 1.0668, "num_input_tokens_seen": 21553004, "step": 344 }, { "epoch": 1.1447587354409319, "loss": 1.1139004230499268, "loss_ce": 0.0011073811911046505, "loss_iou": 0.36328125, "loss_num": 0.07763671875, "loss_xval": 1.109375, "num_input_tokens_seen": 21553004, "step": 344 }, { "epoch": 1.1480865224625625, "grad_norm": 16.684860229492188, "learning_rate": 5e-06, "loss": 1.2185, "num_input_tokens_seen": 21616504, "step": 345 }, { "epoch": 1.1480865224625625, "loss": 1.2288806438446045, "loss_ce": 0.002318133134394884, "loss_iou": 0.39453125, "loss_num": 0.087890625, "loss_xval": 1.2265625, "num_input_tokens_seen": 21616504, "step": 345 }, { "epoch": 1.151414309484193, "grad_norm": 13.319676399230957, "learning_rate": 5e-06, "loss": 0.9034, "num_input_tokens_seen": 21679532, "step": 346 }, { "epoch": 1.151414309484193, "loss": 0.8316326141357422, "loss_ce": 0.002897227182984352, "loss_iou": 0.267578125, "loss_num": 0.058349609375, "loss_xval": 0.828125, "num_input_tokens_seen": 21679532, "step": 346 }, { "epoch": 1.1547420965058237, "grad_norm": 11.575480461120605, "learning_rate": 5e-06, "loss": 1.139, "num_input_tokens_seen": 21742168, "step": 347 }, { "epoch": 1.1547420965058237, "loss": 0.9771150946617126, "loss_ce": 0.0007967862184159458, "loss_iou": 0.314453125, "loss_num": 0.0693359375, "loss_xval": 0.9765625, "num_input_tokens_seen": 21742168, "step": 347 }, { "epoch": 1.1580698835274543, "grad_norm": 10.588814735412598, "learning_rate": 5e-06, "loss": 0.9637, "num_input_tokens_seen": 21803688, "step": 348 }, { "epoch": 1.1580698835274543, "loss": 0.6981990933418274, "loss_ce": 0.00020107255841139704, "loss_iou": 0.1669921875, "loss_num": 0.07275390625, "loss_xval": 0.69921875, "num_input_tokens_seen": 21803688, "step": 348 }, { "epoch": 1.161397670549085, "grad_norm": 13.000744819641113, "learning_rate": 5e-06, "loss": 0.7941, "num_input_tokens_seen": 21867184, "step": 349 }, { "epoch": 1.161397670549085, "loss": 1.015153169631958, "loss_ce": 0.0019696177914738655, "loss_iou": 0.27734375, "loss_num": 0.09130859375, "loss_xval": 1.015625, "num_input_tokens_seen": 21867184, "step": 349 }, { "epoch": 1.1647254575707155, "grad_norm": 8.500694274902344, "learning_rate": 5e-06, "loss": 0.9189, "num_input_tokens_seen": 21930068, "step": 350 }, { "epoch": 1.1647254575707155, "loss": 0.8839759826660156, "loss_ce": 0.004825552459806204, "loss_iou": 0.296875, "loss_num": 0.05712890625, "loss_xval": 0.87890625, "num_input_tokens_seen": 21930068, "step": 350 }, { "epoch": 1.1680532445923462, "grad_norm": 22.050308227539062, "learning_rate": 5e-06, "loss": 0.85, "num_input_tokens_seen": 21992112, "step": 351 }, { "epoch": 1.1680532445923462, "loss": 0.871414065361023, "loss_ce": 0.005691413767635822, "loss_iou": 0.17578125, "loss_num": 0.10302734375, "loss_xval": 0.8671875, "num_input_tokens_seen": 21992112, "step": 351 }, { "epoch": 1.1713810316139768, "grad_norm": 15.947305679321289, "learning_rate": 5e-06, "loss": 0.9005, "num_input_tokens_seen": 22055200, "step": 352 }, { "epoch": 1.1713810316139768, "loss": 0.8017796277999878, "loss_ce": 0.00209699384868145, "loss_iou": 0.232421875, "loss_num": 0.06689453125, "loss_xval": 0.80078125, "num_input_tokens_seen": 22055200, "step": 352 }, { "epoch": 1.1747088186356074, "grad_norm": 15.728245735168457, "learning_rate": 5e-06, "loss": 0.9721, "num_input_tokens_seen": 22117372, "step": 353 }, { "epoch": 1.1747088186356074, "loss": 0.8560371398925781, "loss_ce": 0.026935596019029617, "loss_iou": 0.2470703125, "loss_num": 0.0673828125, "loss_xval": 0.828125, "num_input_tokens_seen": 22117372, "step": 353 }, { "epoch": 1.178036605657238, "grad_norm": 9.21013355255127, "learning_rate": 5e-06, "loss": 1.0148, "num_input_tokens_seen": 22180280, "step": 354 }, { "epoch": 1.178036605657238, "loss": 1.0702283382415771, "loss_ce": 0.0018690190045163035, "loss_iou": 0.337890625, "loss_num": 0.0791015625, "loss_xval": 1.0703125, "num_input_tokens_seen": 22180280, "step": 354 }, { "epoch": 1.1813643926788686, "grad_norm": 13.137001991271973, "learning_rate": 5e-06, "loss": 0.9983, "num_input_tokens_seen": 22243800, "step": 355 }, { "epoch": 1.1813643926788686, "loss": 0.9866027235984802, "loss_ce": 0.002227720571681857, "loss_iou": 0.34765625, "loss_num": 0.057861328125, "loss_xval": 0.984375, "num_input_tokens_seen": 22243800, "step": 355 }, { "epoch": 1.1846921797004992, "grad_norm": 13.438755989074707, "learning_rate": 5e-06, "loss": 0.9054, "num_input_tokens_seen": 22306152, "step": 356 }, { "epoch": 1.1846921797004992, "loss": 0.93604576587677, "loss_ce": 0.0034285818692296743, "loss_iou": 0.3203125, "loss_num": 0.05908203125, "loss_xval": 0.93359375, "num_input_tokens_seen": 22306152, "step": 356 }, { "epoch": 1.1880199667221298, "grad_norm": 9.37387466430664, "learning_rate": 5e-06, "loss": 0.9098, "num_input_tokens_seen": 22368308, "step": 357 }, { "epoch": 1.1880199667221298, "loss": 0.8839547038078308, "loss_ce": 0.0011421950766816735, "loss_iou": 0.22265625, "loss_num": 0.08740234375, "loss_xval": 0.8828125, "num_input_tokens_seen": 22368308, "step": 357 }, { "epoch": 1.1913477537437605, "grad_norm": 15.862778663635254, "learning_rate": 5e-06, "loss": 0.9651, "num_input_tokens_seen": 22430760, "step": 358 }, { "epoch": 1.1913477537437605, "loss": 0.8542115688323975, "loss_ce": 0.0004518293426372111, "loss_iou": 0.2734375, "loss_num": 0.061767578125, "loss_xval": 0.85546875, "num_input_tokens_seen": 22430760, "step": 358 }, { "epoch": 1.194675540765391, "grad_norm": 13.395066261291504, "learning_rate": 5e-06, "loss": 1.2366, "num_input_tokens_seen": 22494188, "step": 359 }, { "epoch": 1.194675540765391, "loss": 1.1658010482788086, "loss_ce": 0.0014943606220185757, "loss_iou": 0.37890625, "loss_num": 0.08154296875, "loss_xval": 1.1640625, "num_input_tokens_seen": 22494188, "step": 359 }, { "epoch": 1.1980033277870217, "grad_norm": 17.724477767944336, "learning_rate": 5e-06, "loss": 1.1109, "num_input_tokens_seen": 22557396, "step": 360 }, { "epoch": 1.1980033277870217, "loss": 0.9321660995483398, "loss_ce": 0.0002813843311741948, "loss_iou": 0.259765625, "loss_num": 0.08251953125, "loss_xval": 0.93359375, "num_input_tokens_seen": 22557396, "step": 360 }, { "epoch": 1.2013311148086523, "grad_norm": 15.220552444458008, "learning_rate": 5e-06, "loss": 1.3183, "num_input_tokens_seen": 22619512, "step": 361 }, { "epoch": 1.2013311148086523, "loss": 1.3823212385177612, "loss_ce": 0.002438361756503582, "loss_iou": 0.48828125, "loss_num": 0.08056640625, "loss_xval": 1.3828125, "num_input_tokens_seen": 22619512, "step": 361 }, { "epoch": 1.204658901830283, "grad_norm": 13.84691047668457, "learning_rate": 5e-06, "loss": 0.9889, "num_input_tokens_seen": 22683192, "step": 362 }, { "epoch": 1.204658901830283, "loss": 1.0336413383483887, "loss_ce": 0.005320955533534288, "loss_iou": 0.326171875, "loss_num": 0.07568359375, "loss_xval": 1.03125, "num_input_tokens_seen": 22683192, "step": 362 }, { "epoch": 1.2079866888519135, "grad_norm": 8.67003345489502, "learning_rate": 5e-06, "loss": 1.0009, "num_input_tokens_seen": 22746804, "step": 363 }, { "epoch": 1.2079866888519135, "loss": 0.8072459697723389, "loss_ce": 0.001337698893621564, "loss_iou": 0.2265625, "loss_num": 0.0703125, "loss_xval": 0.8046875, "num_input_tokens_seen": 22746804, "step": 363 }, { "epoch": 1.2113144758735441, "grad_norm": 19.87843894958496, "learning_rate": 5e-06, "loss": 1.1418, "num_input_tokens_seen": 22809236, "step": 364 }, { "epoch": 1.2113144758735441, "loss": 1.1470714807510376, "loss_ce": 0.001075398176908493, "loss_iou": 0.36328125, "loss_num": 0.083984375, "loss_xval": 1.1484375, "num_input_tokens_seen": 22809236, "step": 364 }, { "epoch": 1.2146422628951747, "grad_norm": 19.612537384033203, "learning_rate": 5e-06, "loss": 1.0437, "num_input_tokens_seen": 22871224, "step": 365 }, { "epoch": 1.2146422628951747, "loss": 1.0723521709442139, "loss_ce": 0.0015513792168349028, "loss_iou": 0.37109375, "loss_num": 0.06591796875, "loss_xval": 1.0703125, "num_input_tokens_seen": 22871224, "step": 365 }, { "epoch": 1.2179700499168054, "grad_norm": 13.617435455322266, "learning_rate": 5e-06, "loss": 1.1042, "num_input_tokens_seen": 22934344, "step": 366 }, { "epoch": 1.2179700499168054, "loss": 1.1154537200927734, "loss_ce": 0.0016841854667291045, "loss_iou": 0.3671875, "loss_num": 0.07568359375, "loss_xval": 1.1171875, "num_input_tokens_seen": 22934344, "step": 366 }, { "epoch": 1.221297836938436, "grad_norm": 8.669763565063477, "learning_rate": 5e-06, "loss": 0.6066, "num_input_tokens_seen": 22996164, "step": 367 }, { "epoch": 1.221297836938436, "loss": 0.5188974142074585, "loss_ce": 0.005957929417490959, "loss_iou": 0.1201171875, "loss_num": 0.0546875, "loss_xval": 0.51171875, "num_input_tokens_seen": 22996164, "step": 367 }, { "epoch": 1.2246256239600666, "grad_norm": 12.283041000366211, "learning_rate": 5e-06, "loss": 0.9514, "num_input_tokens_seen": 23058328, "step": 368 }, { "epoch": 1.2246256239600666, "loss": 1.0867336988449097, "loss_ce": 0.0010403767228126526, "loss_iou": 0.3046875, "loss_num": 0.09521484375, "loss_xval": 1.0859375, "num_input_tokens_seen": 23058328, "step": 368 }, { "epoch": 1.2279534109816972, "grad_norm": 25.49207305908203, "learning_rate": 5e-06, "loss": 1.1151, "num_input_tokens_seen": 23120736, "step": 369 }, { "epoch": 1.2279534109816972, "loss": 1.349541425704956, "loss_ce": 0.0018851247150450945, "loss_iou": 0.455078125, "loss_num": 0.08740234375, "loss_xval": 1.34375, "num_input_tokens_seen": 23120736, "step": 369 }, { "epoch": 1.2312811980033278, "grad_norm": 20.782428741455078, "learning_rate": 5e-06, "loss": 1.2506, "num_input_tokens_seen": 23182684, "step": 370 }, { "epoch": 1.2312811980033278, "loss": 1.2768054008483887, "loss_ce": 0.00043815511162392795, "loss_iou": 0.37890625, "loss_num": 0.103515625, "loss_xval": 1.2734375, "num_input_tokens_seen": 23182684, "step": 370 }, { "epoch": 1.2346089850249584, "grad_norm": 12.254620552062988, "learning_rate": 5e-06, "loss": 1.2855, "num_input_tokens_seen": 23244864, "step": 371 }, { "epoch": 1.2346089850249584, "loss": 1.1182758808135986, "loss_ce": 0.002309112809598446, "loss_iou": 0.349609375, "loss_num": 0.08349609375, "loss_xval": 1.1171875, "num_input_tokens_seen": 23244864, "step": 371 }, { "epoch": 1.237936772046589, "grad_norm": 20.859880447387695, "learning_rate": 5e-06, "loss": 1.0792, "num_input_tokens_seen": 23308204, "step": 372 }, { "epoch": 1.237936772046589, "loss": 1.1883625984191895, "loss_ce": 0.00037434539990499616, "loss_iou": 0.365234375, "loss_num": 0.091796875, "loss_xval": 1.1875, "num_input_tokens_seen": 23308204, "step": 372 }, { "epoch": 1.2412645590682196, "grad_norm": 10.013941764831543, "learning_rate": 5e-06, "loss": 0.9596, "num_input_tokens_seen": 23371756, "step": 373 }, { "epoch": 1.2412645590682196, "loss": 1.0599653720855713, "loss_ce": 0.0008833592291921377, "loss_iou": 0.35546875, "loss_num": 0.06982421875, "loss_xval": 1.0625, "num_input_tokens_seen": 23371756, "step": 373 }, { "epoch": 1.2445923460898503, "grad_norm": 15.680611610412598, "learning_rate": 5e-06, "loss": 1.147, "num_input_tokens_seen": 23434380, "step": 374 }, { "epoch": 1.2445923460898503, "loss": 1.3000984191894531, "loss_ce": 0.0020027763675898314, "loss_iou": 0.349609375, "loss_num": 0.11962890625, "loss_xval": 1.296875, "num_input_tokens_seen": 23434380, "step": 374 }, { "epoch": 1.2479201331114809, "grad_norm": 55.765045166015625, "learning_rate": 5e-06, "loss": 0.9942, "num_input_tokens_seen": 23496936, "step": 375 }, { "epoch": 1.2479201331114809, "loss": 0.8713576793670654, "loss_ce": 0.0001418392639607191, "loss_iou": 0.318359375, "loss_num": 0.046630859375, "loss_xval": 0.87109375, "num_input_tokens_seen": 23496936, "step": 375 }, { "epoch": 1.2512479201331115, "grad_norm": 11.986567497253418, "learning_rate": 5e-06, "loss": 1.1553, "num_input_tokens_seen": 23561228, "step": 376 }, { "epoch": 1.2512479201331115, "loss": 1.2353582382202148, "loss_ce": 0.008063295856118202, "loss_iou": 0.41796875, "loss_num": 0.078125, "loss_xval": 1.2265625, "num_input_tokens_seen": 23561228, "step": 376 }, { "epoch": 1.254575707154742, "grad_norm": 15.518547058105469, "learning_rate": 5e-06, "loss": 1.1645, "num_input_tokens_seen": 23623508, "step": 377 }, { "epoch": 1.254575707154742, "loss": 1.1245479583740234, "loss_ce": 0.011266733519732952, "loss_iou": 0.416015625, "loss_num": 0.056396484375, "loss_xval": 1.109375, "num_input_tokens_seen": 23623508, "step": 377 }, { "epoch": 1.2579034941763727, "grad_norm": 12.487281799316406, "learning_rate": 5e-06, "loss": 1.1126, "num_input_tokens_seen": 23686012, "step": 378 }, { "epoch": 1.2579034941763727, "loss": 1.173827052116394, "loss_ce": 0.0009754931088536978, "loss_iou": 0.3359375, "loss_num": 0.1005859375, "loss_xval": 1.171875, "num_input_tokens_seen": 23686012, "step": 378 }, { "epoch": 1.2612312811980033, "grad_norm": 14.35226821899414, "learning_rate": 5e-06, "loss": 1.0547, "num_input_tokens_seen": 23749124, "step": 379 }, { "epoch": 1.2612312811980033, "loss": 1.0610747337341309, "loss_ce": 0.0007720579742453992, "loss_iou": 0.361328125, "loss_num": 0.0673828125, "loss_xval": 1.0625, "num_input_tokens_seen": 23749124, "step": 379 }, { "epoch": 1.264559068219634, "grad_norm": 24.53921890258789, "learning_rate": 5e-06, "loss": 1.2042, "num_input_tokens_seen": 23811144, "step": 380 }, { "epoch": 1.264559068219634, "loss": 1.225796103477478, "loss_ce": 0.0036280876956880093, "loss_iou": 0.388671875, "loss_num": 0.08984375, "loss_xval": 1.21875, "num_input_tokens_seen": 23811144, "step": 380 }, { "epoch": 1.2678868552412645, "grad_norm": 13.439510345458984, "learning_rate": 5e-06, "loss": 0.9866, "num_input_tokens_seen": 23874480, "step": 381 }, { "epoch": 1.2678868552412645, "loss": 0.8618694543838501, "loss_ce": 0.0004192190826870501, "loss_iou": 0.283203125, "loss_num": 0.05859375, "loss_xval": 0.86328125, "num_input_tokens_seen": 23874480, "step": 381 }, { "epoch": 1.2712146422628952, "grad_norm": 16.41143035888672, "learning_rate": 5e-06, "loss": 0.9888, "num_input_tokens_seen": 23936680, "step": 382 }, { "epoch": 1.2712146422628952, "loss": 0.9557693004608154, "loss_ce": 0.00514674698933959, "loss_iou": 0.2490234375, "loss_num": 0.09033203125, "loss_xval": 0.94921875, "num_input_tokens_seen": 23936680, "step": 382 }, { "epoch": 1.2745424292845258, "grad_norm": 20.497072219848633, "learning_rate": 5e-06, "loss": 0.873, "num_input_tokens_seen": 23997596, "step": 383 }, { "epoch": 1.2745424292845258, "loss": 0.9353244304656982, "loss_ce": 0.0036837609950453043, "loss_iou": 0.2890625, "loss_num": 0.07080078125, "loss_xval": 0.9296875, "num_input_tokens_seen": 23997596, "step": 383 }, { "epoch": 1.2778702163061564, "grad_norm": 16.8009033203125, "learning_rate": 5e-06, "loss": 0.836, "num_input_tokens_seen": 24059808, "step": 384 }, { "epoch": 1.2778702163061564, "loss": 0.957974910736084, "loss_ce": 0.0016761153237894177, "loss_iou": 0.306640625, "loss_num": 0.068359375, "loss_xval": 0.95703125, "num_input_tokens_seen": 24059808, "step": 384 }, { "epoch": 1.281198003327787, "grad_norm": 16.275964736938477, "learning_rate": 5e-06, "loss": 0.9754, "num_input_tokens_seen": 24123296, "step": 385 }, { "epoch": 1.281198003327787, "loss": 1.200371503829956, "loss_ce": 0.0004203752614557743, "loss_iou": 0.37109375, "loss_num": 0.09130859375, "loss_xval": 1.203125, "num_input_tokens_seen": 24123296, "step": 385 }, { "epoch": 1.2845257903494176, "grad_norm": 15.432588577270508, "learning_rate": 5e-06, "loss": 1.1704, "num_input_tokens_seen": 24186340, "step": 386 }, { "epoch": 1.2845257903494176, "loss": 1.2510305643081665, "loss_ce": 0.006401653401553631, "loss_iou": 0.453125, "loss_num": 0.068359375, "loss_xval": 1.2421875, "num_input_tokens_seen": 24186340, "step": 386 }, { "epoch": 1.2878535773710482, "grad_norm": 12.922508239746094, "learning_rate": 5e-06, "loss": 1.2301, "num_input_tokens_seen": 24250244, "step": 387 }, { "epoch": 1.2878535773710482, "loss": 1.315189242362976, "loss_ce": 0.0007360831368714571, "loss_iou": 0.486328125, "loss_num": 0.068359375, "loss_xval": 1.3125, "num_input_tokens_seen": 24250244, "step": 387 }, { "epoch": 1.2911813643926788, "grad_norm": 16.12157440185547, "learning_rate": 5e-06, "loss": 1.0807, "num_input_tokens_seen": 24312700, "step": 388 }, { "epoch": 1.2911813643926788, "loss": 0.9249197840690613, "loss_ce": 0.00011512526543810964, "loss_iou": 0.29296875, "loss_num": 0.0673828125, "loss_xval": 0.92578125, "num_input_tokens_seen": 24312700, "step": 388 }, { "epoch": 1.2945091514143094, "grad_norm": 15.61776351928711, "learning_rate": 5e-06, "loss": 1.206, "num_input_tokens_seen": 24372944, "step": 389 }, { "epoch": 1.2945091514143094, "loss": 1.2747079133987427, "loss_ce": 0.00664151506498456, "loss_iou": 0.291015625, "loss_num": 0.1376953125, "loss_xval": 1.265625, "num_input_tokens_seen": 24372944, "step": 389 }, { "epoch": 1.29783693843594, "grad_norm": 10.26531982421875, "learning_rate": 5e-06, "loss": 1.1097, "num_input_tokens_seen": 24436248, "step": 390 }, { "epoch": 1.29783693843594, "loss": 1.227935791015625, "loss_ce": 0.0003966076474171132, "loss_iou": 0.416015625, "loss_num": 0.0791015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 24436248, "step": 390 }, { "epoch": 1.3011647254575707, "grad_norm": 10.401841163635254, "learning_rate": 5e-06, "loss": 1.0097, "num_input_tokens_seen": 24498940, "step": 391 }, { "epoch": 1.3011647254575707, "loss": 1.0491610765457153, "loss_ce": 0.004727460443973541, "loss_iou": 0.314453125, "loss_num": 0.08349609375, "loss_xval": 1.046875, "num_input_tokens_seen": 24498940, "step": 391 }, { "epoch": 1.3044925124792013, "grad_norm": 17.991519927978516, "learning_rate": 5e-06, "loss": 1.1516, "num_input_tokens_seen": 24563372, "step": 392 }, { "epoch": 1.3044925124792013, "loss": 1.063659906387329, "loss_ce": 0.0001832506968639791, "loss_iou": 0.357421875, "loss_num": 0.0693359375, "loss_xval": 1.0625, "num_input_tokens_seen": 24563372, "step": 392 }, { "epoch": 1.307820299500832, "grad_norm": 16.1484375, "learning_rate": 5e-06, "loss": 0.8481, "num_input_tokens_seen": 24625780, "step": 393 }, { "epoch": 1.307820299500832, "loss": 0.6541198492050171, "loss_ce": 6.708937871735543e-05, "loss_iou": 0.279296875, "loss_num": 0.018798828125, "loss_xval": 0.65234375, "num_input_tokens_seen": 24625780, "step": 393 }, { "epoch": 1.3111480865224625, "grad_norm": 15.9221773147583, "learning_rate": 5e-06, "loss": 0.9679, "num_input_tokens_seen": 24687492, "step": 394 }, { "epoch": 1.3111480865224625, "loss": 1.267278790473938, "loss_ce": 0.0021421266719698906, "loss_iou": 0.40625, "loss_num": 0.09033203125, "loss_xval": 1.265625, "num_input_tokens_seen": 24687492, "step": 394 }, { "epoch": 1.3144758735440931, "grad_norm": 23.325969696044922, "learning_rate": 5e-06, "loss": 1.177, "num_input_tokens_seen": 24752340, "step": 395 }, { "epoch": 1.3144758735440931, "loss": 1.2171975374221802, "loss_ce": 0.001377263804897666, "loss_iou": 0.431640625, "loss_num": 0.07080078125, "loss_xval": 1.21875, "num_input_tokens_seen": 24752340, "step": 395 }, { "epoch": 1.3178036605657237, "grad_norm": 43.90242004394531, "learning_rate": 5e-06, "loss": 1.1792, "num_input_tokens_seen": 24814652, "step": 396 }, { "epoch": 1.3178036605657237, "loss": 1.2642714977264404, "loss_ce": 0.0020644143223762512, "loss_iou": 0.416015625, "loss_num": 0.08642578125, "loss_xval": 1.265625, "num_input_tokens_seen": 24814652, "step": 396 }, { "epoch": 1.3211314475873543, "grad_norm": 45.1225700378418, "learning_rate": 5e-06, "loss": 1.1999, "num_input_tokens_seen": 24878016, "step": 397 }, { "epoch": 1.3211314475873543, "loss": 1.1546871662139893, "loss_ce": 0.003564041806384921, "loss_iou": 0.388671875, "loss_num": 0.0751953125, "loss_xval": 1.1484375, "num_input_tokens_seen": 24878016, "step": 397 }, { "epoch": 1.324459234608985, "grad_norm": 19.703289031982422, "learning_rate": 5e-06, "loss": 1.1245, "num_input_tokens_seen": 24939340, "step": 398 }, { "epoch": 1.324459234608985, "loss": 0.9508374333381653, "loss_ce": 0.000153833330841735, "loss_iou": 0.3203125, "loss_num": 0.06201171875, "loss_xval": 0.94921875, "num_input_tokens_seen": 24939340, "step": 398 }, { "epoch": 1.3277870216306156, "grad_norm": 11.191904067993164, "learning_rate": 5e-06, "loss": 0.963, "num_input_tokens_seen": 25002132, "step": 399 }, { "epoch": 1.3277870216306156, "loss": 0.7988940477371216, "loss_ce": 0.006901869084686041, "loss_iou": 0.265625, "loss_num": 0.05224609375, "loss_xval": 0.79296875, "num_input_tokens_seen": 25002132, "step": 399 }, { "epoch": 1.3311148086522462, "grad_norm": 10.622575759887695, "learning_rate": 5e-06, "loss": 1.1994, "num_input_tokens_seen": 25066044, "step": 400 }, { "epoch": 1.3311148086522462, "loss": 1.0515964031219482, "loss_ce": 0.0037447987124323845, "loss_iou": 0.353515625, "loss_num": 0.068359375, "loss_xval": 1.046875, "num_input_tokens_seen": 25066044, "step": 400 }, { "epoch": 1.3344425956738768, "grad_norm": 24.427770614624023, "learning_rate": 5e-06, "loss": 0.9622, "num_input_tokens_seen": 25128340, "step": 401 }, { "epoch": 1.3344425956738768, "loss": 0.9643534421920776, "loss_ce": 0.002195254433900118, "loss_iou": 0.216796875, "loss_num": 0.10595703125, "loss_xval": 0.9609375, "num_input_tokens_seen": 25128340, "step": 401 }, { "epoch": 1.3377703826955074, "grad_norm": 10.355712890625, "learning_rate": 5e-06, "loss": 1.0325, "num_input_tokens_seen": 25192192, "step": 402 }, { "epoch": 1.3377703826955074, "loss": 1.0074443817138672, "loss_ce": 0.001585077028721571, "loss_iou": 0.33203125, "loss_num": 0.06884765625, "loss_xval": 1.0078125, "num_input_tokens_seen": 25192192, "step": 402 }, { "epoch": 1.341098169717138, "grad_norm": 21.185884475708008, "learning_rate": 5e-06, "loss": 1.0896, "num_input_tokens_seen": 25255636, "step": 403 }, { "epoch": 1.341098169717138, "loss": 0.9336959719657898, "loss_ce": 0.00144498934969306, "loss_iou": 0.345703125, "loss_num": 0.04833984375, "loss_xval": 0.93359375, "num_input_tokens_seen": 25255636, "step": 403 }, { "epoch": 1.3444259567387689, "grad_norm": 9.39505386352539, "learning_rate": 5e-06, "loss": 1.0355, "num_input_tokens_seen": 25317992, "step": 404 }, { "epoch": 1.3444259567387689, "loss": 1.0250355005264282, "loss_ce": 0.00013315524847712368, "loss_iou": 0.353515625, "loss_num": 0.0634765625, "loss_xval": 1.0234375, "num_input_tokens_seen": 25317992, "step": 404 }, { "epoch": 1.3477537437603995, "grad_norm": 20.77029037475586, "learning_rate": 5e-06, "loss": 1.1133, "num_input_tokens_seen": 25380772, "step": 405 }, { "epoch": 1.3477537437603995, "loss": 1.0792059898376465, "loss_ce": 0.011090747080743313, "loss_iou": 0.296875, "loss_num": 0.09521484375, "loss_xval": 1.0703125, "num_input_tokens_seen": 25380772, "step": 405 }, { "epoch": 1.35108153078203, "grad_norm": 31.16495132446289, "learning_rate": 5e-06, "loss": 1.1511, "num_input_tokens_seen": 25445244, "step": 406 }, { "epoch": 1.35108153078203, "loss": 1.2152575254440308, "loss_ce": 0.0004138269869145006, "loss_iou": 0.390625, "loss_num": 0.08740234375, "loss_xval": 1.21875, "num_input_tokens_seen": 25445244, "step": 406 }, { "epoch": 1.3544093178036607, "grad_norm": 20.450408935546875, "learning_rate": 5e-06, "loss": 0.8806, "num_input_tokens_seen": 25507188, "step": 407 }, { "epoch": 1.3544093178036607, "loss": 0.6218196153640747, "loss_ce": 0.00023759223404340446, "loss_iou": 0.15234375, "loss_num": 0.0634765625, "loss_xval": 0.62109375, "num_input_tokens_seen": 25507188, "step": 407 }, { "epoch": 1.3577371048252913, "grad_norm": 16.568866729736328, "learning_rate": 5e-06, "loss": 0.9924, "num_input_tokens_seen": 25570620, "step": 408 }, { "epoch": 1.3577371048252913, "loss": 0.8238029479980469, "loss_ce": 0.022045187652111053, "loss_iou": 0.2314453125, "loss_num": 0.06787109375, "loss_xval": 0.80078125, "num_input_tokens_seen": 25570620, "step": 408 }, { "epoch": 1.361064891846922, "grad_norm": 17.278478622436523, "learning_rate": 5e-06, "loss": 1.0307, "num_input_tokens_seen": 25633892, "step": 409 }, { "epoch": 1.361064891846922, "loss": 1.0489603281021118, "loss_ce": 0.0023294282145798206, "loss_iou": 0.32421875, "loss_num": 0.0791015625, "loss_xval": 1.046875, "num_input_tokens_seen": 25633892, "step": 409 }, { "epoch": 1.3643926788685525, "grad_norm": 12.66992473602295, "learning_rate": 5e-06, "loss": 1.148, "num_input_tokens_seen": 25697340, "step": 410 }, { "epoch": 1.3643926788685525, "loss": 1.0088579654693604, "loss_ce": 0.003456423059105873, "loss_iou": 0.2890625, "loss_num": 0.08544921875, "loss_xval": 1.0078125, "num_input_tokens_seen": 25697340, "step": 410 }, { "epoch": 1.3677204658901831, "grad_norm": 6.634986400604248, "learning_rate": 5e-06, "loss": 0.8108, "num_input_tokens_seen": 25758432, "step": 411 }, { "epoch": 1.3677204658901831, "loss": 0.9283370971679688, "loss_ce": 0.00426485575735569, "loss_iou": 0.267578125, "loss_num": 0.07763671875, "loss_xval": 0.92578125, "num_input_tokens_seen": 25758432, "step": 411 }, { "epoch": 1.3710482529118138, "grad_norm": 10.549354553222656, "learning_rate": 5e-06, "loss": 1.0254, "num_input_tokens_seen": 25822248, "step": 412 }, { "epoch": 1.3710482529118138, "loss": 0.9352825284004211, "loss_ce": 0.005350937135517597, "loss_iou": 0.294921875, "loss_num": 0.068359375, "loss_xval": 0.9296875, "num_input_tokens_seen": 25822248, "step": 412 }, { "epoch": 1.3743760399334444, "grad_norm": 14.273979187011719, "learning_rate": 5e-06, "loss": 1.1039, "num_input_tokens_seen": 25884780, "step": 413 }, { "epoch": 1.3743760399334444, "loss": 1.2133036851882935, "loss_ce": 0.0009013470844365656, "loss_iou": 0.42578125, "loss_num": 0.072265625, "loss_xval": 1.2109375, "num_input_tokens_seen": 25884780, "step": 413 }, { "epoch": 1.377703826955075, "grad_norm": 14.600478172302246, "learning_rate": 5e-06, "loss": 0.9331, "num_input_tokens_seen": 25946912, "step": 414 }, { "epoch": 1.377703826955075, "loss": 0.6742814183235168, "loss_ce": 0.00045326852705329657, "loss_iou": 0.1923828125, "loss_num": 0.057861328125, "loss_xval": 0.671875, "num_input_tokens_seen": 25946912, "step": 414 }, { "epoch": 1.3810316139767056, "grad_norm": 11.724282264709473, "learning_rate": 5e-06, "loss": 1.0044, "num_input_tokens_seen": 26009668, "step": 415 }, { "epoch": 1.3810316139767056, "loss": 1.0532835721969604, "loss_ce": 6.0868831496918574e-05, "loss_iou": 0.333984375, "loss_num": 0.0771484375, "loss_xval": 1.0546875, "num_input_tokens_seen": 26009668, "step": 415 }, { "epoch": 1.3843594009983362, "grad_norm": 10.00571346282959, "learning_rate": 5e-06, "loss": 0.9009, "num_input_tokens_seen": 26071008, "step": 416 }, { "epoch": 1.3843594009983362, "loss": 1.1453657150268555, "loss_ce": 0.044779855757951736, "loss_iou": 0.3125, "loss_num": 0.09521484375, "loss_xval": 1.1015625, "num_input_tokens_seen": 26071008, "step": 416 }, { "epoch": 1.3876871880199668, "grad_norm": 15.58684253692627, "learning_rate": 5e-06, "loss": 1.1956, "num_input_tokens_seen": 26132352, "step": 417 }, { "epoch": 1.3876871880199668, "loss": 0.8832725882530212, "loss_ce": 0.00046005373587831855, "loss_iou": 0.2373046875, "loss_num": 0.08154296875, "loss_xval": 0.8828125, "num_input_tokens_seen": 26132352, "step": 417 }, { "epoch": 1.3910149750415974, "grad_norm": 19.847135543823242, "learning_rate": 5e-06, "loss": 0.9647, "num_input_tokens_seen": 26195748, "step": 418 }, { "epoch": 1.3910149750415974, "loss": 0.9432509541511536, "loss_ce": 0.0030654240399599075, "loss_iou": 0.35546875, "loss_num": 0.046142578125, "loss_xval": 0.94140625, "num_input_tokens_seen": 26195748, "step": 418 }, { "epoch": 1.394342762063228, "grad_norm": 34.97438049316406, "learning_rate": 5e-06, "loss": 1.1494, "num_input_tokens_seen": 26257164, "step": 419 }, { "epoch": 1.394342762063228, "loss": 1.2391083240509033, "loss_ce": 9.462250454816967e-05, "loss_iou": 0.41015625, "loss_num": 0.08349609375, "loss_xval": 1.2421875, "num_input_tokens_seen": 26257164, "step": 419 }, { "epoch": 1.3976705490848587, "grad_norm": 14.589425086975098, "learning_rate": 5e-06, "loss": 1.1584, "num_input_tokens_seen": 26320716, "step": 420 }, { "epoch": 1.3976705490848587, "loss": 1.2718900442123413, "loss_ce": 0.004800152964890003, "loss_iou": 0.380859375, "loss_num": 0.10107421875, "loss_xval": 1.265625, "num_input_tokens_seen": 26320716, "step": 420 }, { "epoch": 1.4009983361064893, "grad_norm": 13.36715316772461, "learning_rate": 5e-06, "loss": 1.1376, "num_input_tokens_seen": 26384520, "step": 421 }, { "epoch": 1.4009983361064893, "loss": 0.8977671265602112, "loss_ce": 0.0003061619936488569, "loss_iou": 0.31640625, "loss_num": 0.052734375, "loss_xval": 0.8984375, "num_input_tokens_seen": 26384520, "step": 421 }, { "epoch": 1.4043261231281199, "grad_norm": 23.56789779663086, "learning_rate": 5e-06, "loss": 1.0789, "num_input_tokens_seen": 26447724, "step": 422 }, { "epoch": 1.4043261231281199, "loss": 1.1214494705200195, "loss_ce": 0.003285370534285903, "loss_iou": 0.3828125, "loss_num": 0.07080078125, "loss_xval": 1.1171875, "num_input_tokens_seen": 26447724, "step": 422 }, { "epoch": 1.4076539101497505, "grad_norm": 17.013761520385742, "learning_rate": 5e-06, "loss": 1.0713, "num_input_tokens_seen": 26510680, "step": 423 }, { "epoch": 1.4076539101497505, "loss": 1.0345571041107178, "loss_ce": 0.00037738040555268526, "loss_iou": 0.2890625, "loss_num": 0.09130859375, "loss_xval": 1.03125, "num_input_tokens_seen": 26510680, "step": 423 }, { "epoch": 1.410981697171381, "grad_norm": 41.10530471801758, "learning_rate": 5e-06, "loss": 1.1384, "num_input_tokens_seen": 26574016, "step": 424 }, { "epoch": 1.410981697171381, "loss": 1.054764747619629, "loss_ce": 7.73034553276375e-05, "loss_iou": 0.3359375, "loss_num": 0.07666015625, "loss_xval": 1.0546875, "num_input_tokens_seen": 26574016, "step": 424 }, { "epoch": 1.4143094841930117, "grad_norm": 81.72950744628906, "learning_rate": 5e-06, "loss": 1.1902, "num_input_tokens_seen": 26637704, "step": 425 }, { "epoch": 1.4143094841930117, "loss": 0.9354544878005981, "loss_ce": 0.0003958155866712332, "loss_iou": 0.341796875, "loss_num": 0.050537109375, "loss_xval": 0.93359375, "num_input_tokens_seen": 26637704, "step": 425 }, { "epoch": 1.4176372712146423, "grad_norm": 29.790283203125, "learning_rate": 5e-06, "loss": 1.1938, "num_input_tokens_seen": 26701468, "step": 426 }, { "epoch": 1.4176372712146423, "loss": 0.8883048892021179, "loss_ce": 0.002562721725553274, "loss_iou": 0.228515625, "loss_num": 0.08544921875, "loss_xval": 0.88671875, "num_input_tokens_seen": 26701468, "step": 426 }, { "epoch": 1.420965058236273, "grad_norm": 16.44548797607422, "learning_rate": 5e-06, "loss": 1.1486, "num_input_tokens_seen": 26764044, "step": 427 }, { "epoch": 1.420965058236273, "loss": 1.1505101919174194, "loss_ce": 0.0006078255828469992, "loss_iou": 0.337890625, "loss_num": 0.0947265625, "loss_xval": 1.1484375, "num_input_tokens_seen": 26764044, "step": 427 }, { "epoch": 1.4242928452579036, "grad_norm": 194.185791015625, "learning_rate": 5e-06, "loss": 0.9832, "num_input_tokens_seen": 26826320, "step": 428 }, { "epoch": 1.4242928452579036, "loss": 0.9561614990234375, "loss_ce": 0.004989705514162779, "loss_iou": 0.212890625, "loss_num": 0.10546875, "loss_xval": 0.953125, "num_input_tokens_seen": 26826320, "step": 428 }, { "epoch": 1.4276206322795342, "grad_norm": 27.536348342895508, "learning_rate": 5e-06, "loss": 1.0146, "num_input_tokens_seen": 26889240, "step": 429 }, { "epoch": 1.4276206322795342, "loss": 1.2331770658493042, "loss_ce": 0.004661452490836382, "loss_iou": 0.40625, "loss_num": 0.0830078125, "loss_xval": 1.2265625, "num_input_tokens_seen": 26889240, "step": 429 }, { "epoch": 1.4309484193011648, "grad_norm": 23.240528106689453, "learning_rate": 5e-06, "loss": 1.2288, "num_input_tokens_seen": 26952020, "step": 430 }, { "epoch": 1.4309484193011648, "loss": 1.34483003616333, "loss_ce": 0.00010352435492677614, "loss_iou": 0.423828125, "loss_num": 0.09912109375, "loss_xval": 1.34375, "num_input_tokens_seen": 26952020, "step": 430 }, { "epoch": 1.4342762063227954, "grad_norm": 14.340412139892578, "learning_rate": 5e-06, "loss": 1.1576, "num_input_tokens_seen": 27015052, "step": 431 }, { "epoch": 1.4342762063227954, "loss": 1.1539814472198486, "loss_ce": 0.0004169994790572673, "loss_iou": 0.375, "loss_num": 0.0810546875, "loss_xval": 1.15625, "num_input_tokens_seen": 27015052, "step": 431 }, { "epoch": 1.437603993344426, "grad_norm": 13.505350112915039, "learning_rate": 5e-06, "loss": 0.9107, "num_input_tokens_seen": 27077640, "step": 432 }, { "epoch": 1.437603993344426, "loss": 0.8342275023460388, "loss_ce": 0.0010976643534377217, "loss_iou": 0.265625, "loss_num": 0.060546875, "loss_xval": 0.83203125, "num_input_tokens_seen": 27077640, "step": 432 }, { "epoch": 1.4409317803660566, "grad_norm": 15.690736770629883, "learning_rate": 5e-06, "loss": 0.8302, "num_input_tokens_seen": 27140080, "step": 433 }, { "epoch": 1.4409317803660566, "loss": 0.9211246967315674, "loss_ce": 0.0016911044949665666, "loss_iou": 0.287109375, "loss_num": 0.06884765625, "loss_xval": 0.91796875, "num_input_tokens_seen": 27140080, "step": 433 }, { "epoch": 1.4442595673876872, "grad_norm": 13.289794921875, "learning_rate": 5e-06, "loss": 0.8306, "num_input_tokens_seen": 27203336, "step": 434 }, { "epoch": 1.4442595673876872, "loss": 0.7346397042274475, "loss_ce": 0.00050883594667539, "loss_iou": 0.267578125, "loss_num": 0.03955078125, "loss_xval": 0.734375, "num_input_tokens_seen": 27203336, "step": 434 }, { "epoch": 1.4475873544093179, "grad_norm": 11.401854515075684, "learning_rate": 5e-06, "loss": 0.9116, "num_input_tokens_seen": 27265880, "step": 435 }, { "epoch": 1.4475873544093179, "loss": 0.9152499437332153, "loss_ce": 0.0006992316339164972, "loss_iou": 0.30859375, "loss_num": 0.05908203125, "loss_xval": 0.9140625, "num_input_tokens_seen": 27265880, "step": 435 }, { "epoch": 1.4509151414309485, "grad_norm": 10.400731086730957, "learning_rate": 5e-06, "loss": 1.1403, "num_input_tokens_seen": 27329740, "step": 436 }, { "epoch": 1.4509151414309485, "loss": 1.0320229530334473, "loss_ce": 0.0005287755047902465, "loss_iou": 0.345703125, "loss_num": 0.068359375, "loss_xval": 1.03125, "num_input_tokens_seen": 27329740, "step": 436 }, { "epoch": 1.454242928452579, "grad_norm": 18.541240692138672, "learning_rate": 5e-06, "loss": 0.7472, "num_input_tokens_seen": 27392388, "step": 437 }, { "epoch": 1.454242928452579, "loss": 0.6203223466873169, "loss_ce": 0.0013037655735388398, "loss_iou": 0.130859375, "loss_num": 0.0712890625, "loss_xval": 0.6171875, "num_input_tokens_seen": 27392388, "step": 437 }, { "epoch": 1.4575707154742097, "grad_norm": 22.43004035949707, "learning_rate": 5e-06, "loss": 0.9605, "num_input_tokens_seen": 27453448, "step": 438 }, { "epoch": 1.4575707154742097, "loss": 0.6307517290115356, "loss_ce": 0.00013653574569616467, "loss_iou": 0.103515625, "loss_num": 0.08447265625, "loss_xval": 0.62890625, "num_input_tokens_seen": 27453448, "step": 438 }, { "epoch": 1.4608985024958403, "grad_norm": 13.829190254211426, "learning_rate": 5e-06, "loss": 0.9866, "num_input_tokens_seen": 27515228, "step": 439 }, { "epoch": 1.4608985024958403, "loss": 0.9368977546691895, "loss_ce": 0.004280570894479752, "loss_iou": 0.2890625, "loss_num": 0.07080078125, "loss_xval": 0.93359375, "num_input_tokens_seen": 27515228, "step": 439 }, { "epoch": 1.464226289517471, "grad_norm": 14.281774520874023, "learning_rate": 5e-06, "loss": 0.8164, "num_input_tokens_seen": 27577956, "step": 440 }, { "epoch": 1.464226289517471, "loss": 0.7943712472915649, "loss_ce": 0.002379045821726322, "loss_iou": 0.232421875, "loss_num": 0.06591796875, "loss_xval": 0.79296875, "num_input_tokens_seen": 27577956, "step": 440 }, { "epoch": 1.4675540765391015, "grad_norm": 11.133589744567871, "learning_rate": 5e-06, "loss": 1.0627, "num_input_tokens_seen": 27641140, "step": 441 }, { "epoch": 1.4675540765391015, "loss": 1.2072147130966187, "loss_ce": 0.0011600162833929062, "loss_iou": 0.357421875, "loss_num": 0.09814453125, "loss_xval": 1.203125, "num_input_tokens_seen": 27641140, "step": 441 }, { "epoch": 1.4708818635607321, "grad_norm": 15.000788688659668, "learning_rate": 5e-06, "loss": 0.919, "num_input_tokens_seen": 27702864, "step": 442 }, { "epoch": 1.4708818635607321, "loss": 1.0833325386047363, "loss_ce": 0.0015454718377441168, "loss_iou": 0.353515625, "loss_num": 0.0751953125, "loss_xval": 1.078125, "num_input_tokens_seen": 27702864, "step": 442 }, { "epoch": 1.4742096505823628, "grad_norm": 10.785703659057617, "learning_rate": 5e-06, "loss": 0.9502, "num_input_tokens_seen": 27766516, "step": 443 }, { "epoch": 1.4742096505823628, "loss": 0.9589688777923584, "loss_ce": 0.0007169640157371759, "loss_iou": 0.31640625, "loss_num": 0.0654296875, "loss_xval": 0.95703125, "num_input_tokens_seen": 27766516, "step": 443 }, { "epoch": 1.4775374376039934, "grad_norm": 14.798559188842773, "learning_rate": 5e-06, "loss": 1.033, "num_input_tokens_seen": 27829568, "step": 444 }, { "epoch": 1.4775374376039934, "loss": 1.023010015487671, "loss_ce": 0.00030495758983306587, "loss_iou": 0.3515625, "loss_num": 0.0634765625, "loss_xval": 1.0234375, "num_input_tokens_seen": 27829568, "step": 444 }, { "epoch": 1.480865224625624, "grad_norm": 11.827593803405762, "learning_rate": 5e-06, "loss": 0.6733, "num_input_tokens_seen": 27892536, "step": 445 }, { "epoch": 1.480865224625624, "loss": 0.5998960733413696, "loss_ce": 0.0006834049127064645, "loss_iou": 0.19140625, "loss_num": 0.043212890625, "loss_xval": 0.59765625, "num_input_tokens_seen": 27892536, "step": 445 }, { "epoch": 1.4841930116472546, "grad_norm": 12.027179718017578, "learning_rate": 5e-06, "loss": 0.9374, "num_input_tokens_seen": 27954480, "step": 446 }, { "epoch": 1.4841930116472546, "loss": 0.8107286095619202, "loss_ce": 0.005674911662936211, "loss_iou": 0.251953125, "loss_num": 0.060302734375, "loss_xval": 0.8046875, "num_input_tokens_seen": 27954480, "step": 446 }, { "epoch": 1.4875207986688852, "grad_norm": 12.46907901763916, "learning_rate": 5e-06, "loss": 0.9105, "num_input_tokens_seen": 28015384, "step": 447 }, { "epoch": 1.4875207986688852, "loss": 1.007506012916565, "loss_ce": 0.00018174726574216038, "loss_iou": 0.24609375, "loss_num": 0.10302734375, "loss_xval": 1.0078125, "num_input_tokens_seen": 28015384, "step": 447 }, { "epoch": 1.4908485856905158, "grad_norm": 19.959171295166016, "learning_rate": 5e-06, "loss": 0.984, "num_input_tokens_seen": 28077524, "step": 448 }, { "epoch": 1.4908485856905158, "loss": 1.0535316467285156, "loss_ce": 0.0010414018761366606, "loss_iou": 0.3046875, "loss_num": 0.08837890625, "loss_xval": 1.0546875, "num_input_tokens_seen": 28077524, "step": 448 }, { "epoch": 1.4941763727121464, "grad_norm": 20.844703674316406, "learning_rate": 5e-06, "loss": 0.9459, "num_input_tokens_seen": 28139404, "step": 449 }, { "epoch": 1.4941763727121464, "loss": 0.9492704272270203, "loss_ce": 0.0007841180195100605, "loss_iou": 0.21875, "loss_num": 0.10205078125, "loss_xval": 0.94921875, "num_input_tokens_seen": 28139404, "step": 449 }, { "epoch": 1.497504159733777, "grad_norm": 25.41834831237793, "learning_rate": 5e-06, "loss": 1.3058, "num_input_tokens_seen": 28201620, "step": 450 }, { "epoch": 1.497504159733777, "loss": 1.2704012393951416, "loss_ce": 0.0028230701573193073, "loss_iou": 0.427734375, "loss_num": 0.0830078125, "loss_xval": 1.265625, "num_input_tokens_seen": 28201620, "step": 450 }, { "epoch": 1.5008319467554077, "grad_norm": 35.88543701171875, "learning_rate": 5e-06, "loss": 1.1012, "num_input_tokens_seen": 28265268, "step": 451 }, { "epoch": 1.5008319467554077, "loss": 0.7575756311416626, "loss_ce": 0.001472145551815629, "loss_iou": 0.287109375, "loss_num": 0.035888671875, "loss_xval": 0.7578125, "num_input_tokens_seen": 28265268, "step": 451 }, { "epoch": 1.5041597337770383, "grad_norm": 9.400871276855469, "learning_rate": 5e-06, "loss": 0.7297, "num_input_tokens_seen": 28328344, "step": 452 }, { "epoch": 1.5041597337770383, "loss": 0.6579368114471436, "loss_ce": 0.0014427044661715627, "loss_iou": 0.216796875, "loss_num": 0.044677734375, "loss_xval": 0.65625, "num_input_tokens_seen": 28328344, "step": 452 }, { "epoch": 1.5074875207986689, "grad_norm": 16.661739349365234, "learning_rate": 5e-06, "loss": 1.0671, "num_input_tokens_seen": 28390980, "step": 453 }, { "epoch": 1.5074875207986689, "loss": 1.226120114326477, "loss_ce": 0.0010225145379081368, "loss_iou": 0.345703125, "loss_num": 0.1064453125, "loss_xval": 1.2265625, "num_input_tokens_seen": 28390980, "step": 453 }, { "epoch": 1.5108153078202995, "grad_norm": 16.2419490814209, "learning_rate": 5e-06, "loss": 0.9537, "num_input_tokens_seen": 28453244, "step": 454 }, { "epoch": 1.5108153078202995, "loss": 0.9552655816078186, "loss_ce": 0.0018964293412864208, "loss_iou": 0.1962890625, "loss_num": 0.1123046875, "loss_xval": 0.953125, "num_input_tokens_seen": 28453244, "step": 454 }, { "epoch": 1.51414309484193, "grad_norm": 22.404499053955078, "learning_rate": 5e-06, "loss": 0.8511, "num_input_tokens_seen": 28515280, "step": 455 }, { "epoch": 1.51414309484193, "loss": 0.9287418127059937, "loss_ce": 0.006378548685461283, "loss_iou": 0.2734375, "loss_num": 0.0751953125, "loss_xval": 0.921875, "num_input_tokens_seen": 28515280, "step": 455 }, { "epoch": 1.5174708818635607, "grad_norm": 23.939455032348633, "learning_rate": 5e-06, "loss": 1.3932, "num_input_tokens_seen": 28578200, "step": 456 }, { "epoch": 1.5174708818635607, "loss": 1.485808253288269, "loss_ce": 0.004362954758107662, "loss_iou": 0.4765625, "loss_num": 0.10498046875, "loss_xval": 1.484375, "num_input_tokens_seen": 28578200, "step": 456 }, { "epoch": 1.5207986688851913, "grad_norm": 19.551912307739258, "learning_rate": 5e-06, "loss": 0.9394, "num_input_tokens_seen": 28642336, "step": 457 }, { "epoch": 1.5207986688851913, "loss": 0.8611233234405518, "loss_ce": 3.935792119591497e-05, "loss_iou": 0.328125, "loss_num": 0.041015625, "loss_xval": 0.859375, "num_input_tokens_seen": 28642336, "step": 457 }, { "epoch": 1.524126455906822, "grad_norm": 33.463558197021484, "learning_rate": 5e-06, "loss": 1.3023, "num_input_tokens_seen": 28705660, "step": 458 }, { "epoch": 1.524126455906822, "loss": 1.4212629795074463, "loss_ce": 0.0020735724829137325, "loss_iou": 0.4296875, "loss_num": 0.1123046875, "loss_xval": 1.421875, "num_input_tokens_seen": 28705660, "step": 458 }, { "epoch": 1.5274542429284526, "grad_norm": 32.06713104248047, "learning_rate": 5e-06, "loss": 0.8497, "num_input_tokens_seen": 28768376, "step": 459 }, { "epoch": 1.5274542429284526, "loss": 0.9550895690917969, "loss_ce": 0.0004996892530471087, "loss_iou": 0.357421875, "loss_num": 0.04833984375, "loss_xval": 0.953125, "num_input_tokens_seen": 28768376, "step": 459 }, { "epoch": 1.5307820299500832, "grad_norm": 21.96576690673828, "learning_rate": 5e-06, "loss": 0.8911, "num_input_tokens_seen": 28831008, "step": 460 }, { "epoch": 1.5307820299500832, "loss": 0.796554684638977, "loss_ce": 0.0021211060229688883, "loss_iou": 0.1943359375, "loss_num": 0.0810546875, "loss_xval": 0.79296875, "num_input_tokens_seen": 28831008, "step": 460 }, { "epoch": 1.5341098169717138, "grad_norm": 12.529973030090332, "learning_rate": 5e-06, "loss": 1.0927, "num_input_tokens_seen": 28894052, "step": 461 }, { "epoch": 1.5341098169717138, "loss": 0.7951915264129639, "loss_ce": 0.004420042969286442, "loss_iou": 0.1484375, "loss_num": 0.0986328125, "loss_xval": 0.7890625, "num_input_tokens_seen": 28894052, "step": 461 }, { "epoch": 1.5374376039933444, "grad_norm": 21.059383392333984, "learning_rate": 5e-06, "loss": 0.8035, "num_input_tokens_seen": 28956284, "step": 462 }, { "epoch": 1.5374376039933444, "loss": 0.6268208026885986, "loss_ce": 0.001698711421340704, "loss_iou": 0.1845703125, "loss_num": 0.05126953125, "loss_xval": 0.625, "num_input_tokens_seen": 28956284, "step": 462 }, { "epoch": 1.540765391014975, "grad_norm": 17.505203247070312, "learning_rate": 5e-06, "loss": 1.2985, "num_input_tokens_seen": 29020736, "step": 463 }, { "epoch": 1.540765391014975, "loss": 1.3152070045471191, "loss_ce": 0.0017303972272202373, "loss_iou": 0.42578125, "loss_num": 0.09228515625, "loss_xval": 1.3125, "num_input_tokens_seen": 29020736, "step": 463 }, { "epoch": 1.5440931780366056, "grad_norm": 13.60715389251709, "learning_rate": 5e-06, "loss": 0.8746, "num_input_tokens_seen": 29082664, "step": 464 }, { "epoch": 1.5440931780366056, "loss": 1.051095962524414, "loss_ce": 0.002511953003704548, "loss_iou": 0.345703125, "loss_num": 0.0712890625, "loss_xval": 1.046875, "num_input_tokens_seen": 29082664, "step": 464 }, { "epoch": 1.5474209650582362, "grad_norm": 22.727663040161133, "learning_rate": 5e-06, "loss": 0.8494, "num_input_tokens_seen": 29146680, "step": 465 }, { "epoch": 1.5474209650582362, "loss": 1.0247268676757812, "loss_ce": 0.0010452390415593982, "loss_iou": 0.33984375, "loss_num": 0.06884765625, "loss_xval": 1.0234375, "num_input_tokens_seen": 29146680, "step": 465 }, { "epoch": 1.5507487520798668, "grad_norm": 19.767471313476562, "learning_rate": 5e-06, "loss": 0.9335, "num_input_tokens_seen": 29210188, "step": 466 }, { "epoch": 1.5507487520798668, "loss": 1.0263903141021729, "loss_ce": 0.00368527346290648, "loss_iou": 0.34765625, "loss_num": 0.06494140625, "loss_xval": 1.0234375, "num_input_tokens_seen": 29210188, "step": 466 }, { "epoch": 1.5540765391014975, "grad_norm": 11.171165466308594, "learning_rate": 5e-06, "loss": 0.9697, "num_input_tokens_seen": 29272836, "step": 467 }, { "epoch": 1.5540765391014975, "loss": 0.898423969745636, "loss_ce": 0.006089954171329737, "loss_iou": 0.25390625, "loss_num": 0.07763671875, "loss_xval": 0.890625, "num_input_tokens_seen": 29272836, "step": 467 }, { "epoch": 1.557404326123128, "grad_norm": 12.950803756713867, "learning_rate": 5e-06, "loss": 0.8517, "num_input_tokens_seen": 29334744, "step": 468 }, { "epoch": 1.557404326123128, "loss": 1.0840175151824951, "loss_ce": 0.008089832961559296, "loss_iou": 0.33203125, "loss_num": 0.08203125, "loss_xval": 1.078125, "num_input_tokens_seen": 29334744, "step": 468 }, { "epoch": 1.5607321131447587, "grad_norm": 43.52952194213867, "learning_rate": 5e-06, "loss": 1.2985, "num_input_tokens_seen": 29398080, "step": 469 }, { "epoch": 1.5607321131447587, "loss": 1.0282366275787354, "loss_ce": 0.003090148326009512, "loss_iou": 0.28125, "loss_num": 0.0927734375, "loss_xval": 1.0234375, "num_input_tokens_seen": 29398080, "step": 469 }, { "epoch": 1.5640599001663893, "grad_norm": 49.79926681518555, "learning_rate": 5e-06, "loss": 0.9525, "num_input_tokens_seen": 29461576, "step": 470 }, { "epoch": 1.5640599001663893, "loss": 0.6735060214996338, "loss_ce": 0.0019971991423517466, "loss_iou": 0.23046875, "loss_num": 0.042236328125, "loss_xval": 0.671875, "num_input_tokens_seen": 29461576, "step": 470 }, { "epoch": 1.56738768718802, "grad_norm": 13.252382278442383, "learning_rate": 5e-06, "loss": 0.929, "num_input_tokens_seen": 29524476, "step": 471 }, { "epoch": 1.56738768718802, "loss": 0.9817473888397217, "loss_ce": 0.002011068630963564, "loss_iou": 0.33984375, "loss_num": 0.06005859375, "loss_xval": 0.98046875, "num_input_tokens_seen": 29524476, "step": 471 }, { "epoch": 1.5707154742096505, "grad_norm": 14.923233985900879, "learning_rate": 5e-06, "loss": 1.1227, "num_input_tokens_seen": 29587796, "step": 472 }, { "epoch": 1.5707154742096505, "loss": 1.3451569080352783, "loss_ce": 0.00043034314876422286, "loss_iou": 0.443359375, "loss_num": 0.0908203125, "loss_xval": 1.34375, "num_input_tokens_seen": 29587796, "step": 472 }, { "epoch": 1.5740432612312811, "grad_norm": 15.69069766998291, "learning_rate": 5e-06, "loss": 0.9072, "num_input_tokens_seen": 29650672, "step": 473 }, { "epoch": 1.5740432612312811, "loss": 0.7996507883071899, "loss_ce": 0.003264050930738449, "loss_iou": 0.2490234375, "loss_num": 0.0595703125, "loss_xval": 0.796875, "num_input_tokens_seen": 29650672, "step": 473 }, { "epoch": 1.5773710482529117, "grad_norm": 28.606983184814453, "learning_rate": 5e-06, "loss": 0.9883, "num_input_tokens_seen": 29714604, "step": 474 }, { "epoch": 1.5773710482529117, "loss": 0.6658027768135071, "loss_ce": 0.0002754491288214922, "loss_iou": 0.2099609375, "loss_num": 0.04931640625, "loss_xval": 0.6640625, "num_input_tokens_seen": 29714604, "step": 474 }, { "epoch": 1.5806988352745424, "grad_norm": 14.635608673095703, "learning_rate": 5e-06, "loss": 0.9022, "num_input_tokens_seen": 29777760, "step": 475 }, { "epoch": 1.5806988352745424, "loss": 0.7488625049591064, "loss_ce": 0.012534376233816147, "loss_iou": 0.2421875, "loss_num": 0.050537109375, "loss_xval": 0.734375, "num_input_tokens_seen": 29777760, "step": 475 }, { "epoch": 1.584026622296173, "grad_norm": 23.2712459564209, "learning_rate": 5e-06, "loss": 0.7148, "num_input_tokens_seen": 29838992, "step": 476 }, { "epoch": 1.584026622296173, "loss": 0.6552135944366455, "loss_ce": 0.00030633312417194247, "loss_iou": 0.216796875, "loss_num": 0.04443359375, "loss_xval": 0.65625, "num_input_tokens_seen": 29838992, "step": 476 }, { "epoch": 1.5873544093178036, "grad_norm": 15.995981216430664, "learning_rate": 5e-06, "loss": 0.807, "num_input_tokens_seen": 29902632, "step": 477 }, { "epoch": 1.5873544093178036, "loss": 0.7012355327606201, "loss_ce": 0.0023830283898860216, "loss_iou": 0.232421875, "loss_num": 0.046875, "loss_xval": 0.69921875, "num_input_tokens_seen": 29902632, "step": 477 }, { "epoch": 1.5906821963394342, "grad_norm": 17.334726333618164, "learning_rate": 5e-06, "loss": 0.8927, "num_input_tokens_seen": 29965592, "step": 478 }, { "epoch": 1.5906821963394342, "loss": 1.0914649963378906, "loss_ce": 0.0006445984472520649, "loss_iou": 0.345703125, "loss_num": 0.080078125, "loss_xval": 1.09375, "num_input_tokens_seen": 29965592, "step": 478 }, { "epoch": 1.5940099833610648, "grad_norm": 22.14893913269043, "learning_rate": 5e-06, "loss": 0.9355, "num_input_tokens_seen": 30029944, "step": 479 }, { "epoch": 1.5940099833610648, "loss": 0.8586336374282837, "loss_ce": 0.0011202013120055199, "loss_iou": 0.296875, "loss_num": 0.052734375, "loss_xval": 0.859375, "num_input_tokens_seen": 30029944, "step": 479 }, { "epoch": 1.5973377703826954, "grad_norm": 15.957133293151855, "learning_rate": 5e-06, "loss": 0.7887, "num_input_tokens_seen": 30091948, "step": 480 }, { "epoch": 1.5973377703826954, "loss": 0.7341344356536865, "loss_ce": 0.0012242539087310433, "loss_iou": 0.19140625, "loss_num": 0.0703125, "loss_xval": 0.734375, "num_input_tokens_seen": 30091948, "step": 480 }, { "epoch": 1.600665557404326, "grad_norm": 12.82165241241455, "learning_rate": 5e-06, "loss": 0.9523, "num_input_tokens_seen": 30153712, "step": 481 }, { "epoch": 1.600665557404326, "loss": 0.8892770409584045, "loss_ce": 0.0006051433738321066, "loss_iou": 0.328125, "loss_num": 0.046630859375, "loss_xval": 0.890625, "num_input_tokens_seen": 30153712, "step": 481 }, { "epoch": 1.6039933444259566, "grad_norm": 16.93235206604004, "learning_rate": 5e-06, "loss": 1.0522, "num_input_tokens_seen": 30216844, "step": 482 }, { "epoch": 1.6039933444259566, "loss": 1.1036779880523682, "loss_ce": 0.0011389621067792177, "loss_iou": 0.267578125, "loss_num": 0.11376953125, "loss_xval": 1.1015625, "num_input_tokens_seen": 30216844, "step": 482 }, { "epoch": 1.6073211314475873, "grad_norm": 12.133223533630371, "learning_rate": 5e-06, "loss": 0.8506, "num_input_tokens_seen": 30278368, "step": 483 }, { "epoch": 1.6073211314475873, "loss": 0.9850232601165771, "loss_ce": 0.0001599617680767551, "loss_iou": 0.30859375, "loss_num": 0.07373046875, "loss_xval": 0.984375, "num_input_tokens_seen": 30278368, "step": 483 }, { "epoch": 1.6106489184692179, "grad_norm": 44.32785415649414, "learning_rate": 5e-06, "loss": 0.9432, "num_input_tokens_seen": 30339888, "step": 484 }, { "epoch": 1.6106489184692179, "loss": 1.1375561952590942, "loss_ce": 0.00022712742793373764, "loss_iou": 0.306640625, "loss_num": 0.10498046875, "loss_xval": 1.140625, "num_input_tokens_seen": 30339888, "step": 484 }, { "epoch": 1.6139767054908485, "grad_norm": 10.50809383392334, "learning_rate": 5e-06, "loss": 0.9693, "num_input_tokens_seen": 30402144, "step": 485 }, { "epoch": 1.6139767054908485, "loss": 0.9972724914550781, "loss_ce": 0.000446375401224941, "loss_iou": 0.328125, "loss_num": 0.06787109375, "loss_xval": 0.99609375, "num_input_tokens_seen": 30402144, "step": 485 }, { "epoch": 1.617304492512479, "grad_norm": 15.496356010437012, "learning_rate": 5e-06, "loss": 1.2141, "num_input_tokens_seen": 30465452, "step": 486 }, { "epoch": 1.617304492512479, "loss": 1.3970156908035278, "loss_ce": 0.0010195414070039988, "loss_iou": 0.421875, "loss_num": 0.1103515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 30465452, "step": 486 }, { "epoch": 1.6206322795341097, "grad_norm": 36.90744400024414, "learning_rate": 5e-06, "loss": 1.0141, "num_input_tokens_seen": 30529608, "step": 487 }, { "epoch": 1.6206322795341097, "loss": 1.0115238428115845, "loss_ce": 0.0037113595753908157, "loss_iou": 0.365234375, "loss_num": 0.05517578125, "loss_xval": 1.0078125, "num_input_tokens_seen": 30529608, "step": 487 }, { "epoch": 1.6239600665557403, "grad_norm": 20.59255027770996, "learning_rate": 5e-06, "loss": 1.0354, "num_input_tokens_seen": 30591308, "step": 488 }, { "epoch": 1.6239600665557403, "loss": 1.2096441984176636, "loss_ce": 0.0016364282928407192, "loss_iou": 0.375, "loss_num": 0.091796875, "loss_xval": 1.2109375, "num_input_tokens_seen": 30591308, "step": 488 }, { "epoch": 1.627287853577371, "grad_norm": 24.88413429260254, "learning_rate": 5e-06, "loss": 0.9571, "num_input_tokens_seen": 30654808, "step": 489 }, { "epoch": 1.627287853577371, "loss": 0.9968461394309998, "loss_ce": 0.003926191478967667, "loss_iou": 0.32421875, "loss_num": 0.06884765625, "loss_xval": 0.9921875, "num_input_tokens_seen": 30654808, "step": 489 }, { "epoch": 1.6306156405990015, "grad_norm": 24.321149826049805, "learning_rate": 5e-06, "loss": 1.1118, "num_input_tokens_seen": 30719144, "step": 490 }, { "epoch": 1.6306156405990015, "loss": 0.9674986600875854, "loss_ce": 0.00192247552331537, "loss_iou": 0.322265625, "loss_num": 0.064453125, "loss_xval": 0.96484375, "num_input_tokens_seen": 30719144, "step": 490 }, { "epoch": 1.6339434276206322, "grad_norm": 18.081993103027344, "learning_rate": 5e-06, "loss": 0.905, "num_input_tokens_seen": 30780976, "step": 491 }, { "epoch": 1.6339434276206322, "loss": 0.8123141527175903, "loss_ce": 0.0009127893717959523, "loss_iou": 0.2353515625, "loss_num": 0.06787109375, "loss_xval": 0.8125, "num_input_tokens_seen": 30780976, "step": 491 }, { "epoch": 1.6372712146422628, "grad_norm": 38.994476318359375, "learning_rate": 5e-06, "loss": 0.9762, "num_input_tokens_seen": 30843968, "step": 492 }, { "epoch": 1.6372712146422628, "loss": 0.8970677852630615, "loss_ce": 9.50983667280525e-05, "loss_iou": 0.26171875, "loss_num": 0.07470703125, "loss_xval": 0.8984375, "num_input_tokens_seen": 30843968, "step": 492 }, { "epoch": 1.6405990016638934, "grad_norm": 19.654754638671875, "learning_rate": 5e-06, "loss": 1.1088, "num_input_tokens_seen": 30904740, "step": 493 }, { "epoch": 1.6405990016638934, "loss": 1.1129732131958008, "loss_ce": 0.0016451734118163586, "loss_iou": 0.330078125, "loss_num": 0.09033203125, "loss_xval": 1.109375, "num_input_tokens_seen": 30904740, "step": 493 }, { "epoch": 1.643926788685524, "grad_norm": 18.252052307128906, "learning_rate": 5e-06, "loss": 0.7697, "num_input_tokens_seen": 30968000, "step": 494 }, { "epoch": 1.643926788685524, "loss": 0.8988114595413208, "loss_ce": 0.0008622044115327299, "loss_iou": 0.34765625, "loss_num": 0.040771484375, "loss_xval": 0.8984375, "num_input_tokens_seen": 30968000, "step": 494 }, { "epoch": 1.6472545757071546, "grad_norm": 11.877856254577637, "learning_rate": 5e-06, "loss": 0.7778, "num_input_tokens_seen": 31031844, "step": 495 }, { "epoch": 1.6472545757071546, "loss": 0.8768675327301025, "loss_ce": 0.0011351365828886628, "loss_iou": 0.2314453125, "loss_num": 0.08251953125, "loss_xval": 0.875, "num_input_tokens_seen": 31031844, "step": 495 }, { "epoch": 1.6505823627287852, "grad_norm": 16.35790252685547, "learning_rate": 5e-06, "loss": 1.1891, "num_input_tokens_seen": 31095556, "step": 496 }, { "epoch": 1.6505823627287852, "loss": 1.4308223724365234, "loss_ce": 0.0001583620032761246, "loss_iou": 0.4453125, "loss_num": 0.1083984375, "loss_xval": 1.4296875, "num_input_tokens_seen": 31095556, "step": 496 }, { "epoch": 1.6539101497504158, "grad_norm": 12.555450439453125, "learning_rate": 5e-06, "loss": 0.8734, "num_input_tokens_seen": 31158156, "step": 497 }, { "epoch": 1.6539101497504158, "loss": 0.7806690335273743, "loss_ce": 0.007719771936535835, "loss_iou": 0.259765625, "loss_num": 0.051025390625, "loss_xval": 0.7734375, "num_input_tokens_seen": 31158156, "step": 497 }, { "epoch": 1.6572379367720464, "grad_norm": 12.337371826171875, "learning_rate": 5e-06, "loss": 1.1544, "num_input_tokens_seen": 31221496, "step": 498 }, { "epoch": 1.6572379367720464, "loss": 1.3280014991760254, "loss_ce": 0.0008530584746040404, "loss_iou": 0.4375, "loss_num": 0.0908203125, "loss_xval": 1.328125, "num_input_tokens_seen": 31221496, "step": 498 }, { "epoch": 1.660565723793677, "grad_norm": 16.143665313720703, "learning_rate": 5e-06, "loss": 1.252, "num_input_tokens_seen": 31285016, "step": 499 }, { "epoch": 1.660565723793677, "loss": 1.2564626932144165, "loss_ce": 0.0003592088760342449, "loss_iou": 0.435546875, "loss_num": 0.07666015625, "loss_xval": 1.2578125, "num_input_tokens_seen": 31285016, "step": 499 }, { "epoch": 1.6638935108153077, "grad_norm": 10.787676811218262, "learning_rate": 5e-06, "loss": 0.975, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6638935108153077, "eval_seeclick_CIoU": 0.13085231557488441, "eval_seeclick_GIoU": 0.1558702141046524, "eval_seeclick_IoU": 0.23901727050542831, "eval_seeclick_MAE_all": 0.18714486062526703, "eval_seeclick_MAE_h": 0.07685794495046139, "eval_seeclick_MAE_w": 0.13620607554912567, "eval_seeclick_MAE_x_boxes": 0.32132112979888916, "eval_seeclick_MAE_y_boxes": 0.09800946339964867, "eval_seeclick_NUM_probability": 0.9999272227287292, "eval_seeclick_inside_bbox": 0.3541666716337204, "eval_seeclick_loss": 2.6794745922088623, "eval_seeclick_loss_ce": 0.06953983008861542, "eval_seeclick_loss_iou": 0.837890625, "eval_seeclick_loss_num": 0.179595947265625, "eval_seeclick_loss_xval": 2.574462890625, "eval_seeclick_runtime": 65.7063, "eval_seeclick_samples_per_second": 0.715, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6638935108153077, "eval_icons_CIoU": 0.07393881864845753, "eval_icons_GIoU": 0.19662170112133026, "eval_icons_IoU": 0.21981175243854523, "eval_icons_MAE_all": 0.13982237502932549, "eval_icons_MAE_h": 0.07564222812652588, "eval_icons_MAE_w": 0.15035200864076614, "eval_icons_MAE_x_boxes": 0.12051251530647278, "eval_icons_MAE_y_boxes": 0.049114322289824486, "eval_icons_NUM_probability": 0.9999871253967285, "eval_icons_inside_bbox": 0.3420138955116272, "eval_icons_loss": 2.3178176879882812, "eval_icons_loss_ce": 5.301104465615936e-06, "eval_icons_loss_iou": 0.798095703125, "eval_icons_loss_num": 0.14259719848632812, "eval_icons_loss_xval": 2.310546875, "eval_icons_runtime": 65.5349, "eval_icons_samples_per_second": 0.763, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6638935108153077, "eval_screenspot_CIoU": 0.03867738569776217, "eval_screenspot_GIoU": 0.1068417305747668, "eval_screenspot_IoU": 0.18955722451210022, "eval_screenspot_MAE_all": 0.20505708952744803, "eval_screenspot_MAE_h": 0.07092766960461934, "eval_screenspot_MAE_w": 0.17401040097077689, "eval_screenspot_MAE_x_boxes": 0.2769670287768046, "eval_screenspot_MAE_y_boxes": 0.1273154765367508, "eval_screenspot_NUM_probability": 0.9997655948003134, "eval_screenspot_inside_bbox": 0.31791667143503827, "eval_screenspot_loss": 2.826397657394409, "eval_screenspot_loss_ce": 0.0007081345732634267, "eval_screenspot_loss_iou": 0.9080403645833334, "eval_screenspot_loss_num": 0.20873006184895834, "eval_screenspot_loss_xval": 2.8587239583333335, "eval_screenspot_runtime": 123.0498, "eval_screenspot_samples_per_second": 0.723, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6638935108153077, "eval_compot_CIoU": -0.07353978231549263, "eval_compot_GIoU": 0.04984145052731037, "eval_compot_IoU": 0.10792786628007889, "eval_compot_MAE_all": 0.2465325966477394, "eval_compot_MAE_h": 0.08729888498783112, "eval_compot_MAE_w": 0.27648504078388214, "eval_compot_MAE_x_boxes": 0.19644539058208466, "eval_compot_MAE_y_boxes": 0.16193149238824844, "eval_compot_NUM_probability": 0.9998366832733154, "eval_compot_inside_bbox": 0.16840277798473835, "eval_compot_loss": 3.0365827083587646, "eval_compot_loss_ce": 0.005284860031679273, "eval_compot_loss_iou": 0.947509765625, "eval_compot_loss_num": 0.235687255859375, "eval_compot_loss_xval": 3.072265625, "eval_compot_runtime": 67.1863, "eval_compot_samples_per_second": 0.744, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6638935108153077, "eval_custom_ui_MAE_all": 0.09588127583265305, "eval_custom_ui_MAE_x": 0.10331301391124725, "eval_custom_ui_MAE_y": 0.08844954147934914, "eval_custom_ui_NUM_probability": 0.9999941885471344, "eval_custom_ui_loss": 0.48714783787727356, "eval_custom_ui_loss_ce": 0.00020886939091724344, "eval_custom_ui_loss_num": 0.0944366455078125, "eval_custom_ui_loss_xval": 0.4720458984375, "eval_custom_ui_runtime": 51.6926, "eval_custom_ui_samples_per_second": 0.967, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6638935108153077, "loss": 0.44073396921157837, "loss_ce": 0.0003042669268324971, "loss_iou": 0.0, "loss_num": 0.087890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 31348120, "step": 500 }, { "epoch": 1.6672212978369383, "grad_norm": 19.1532039642334, "learning_rate": 5e-06, "loss": 1.0931, "num_input_tokens_seen": 31411648, "step": 501 }, { "epoch": 1.6672212978369383, "loss": 1.0893843173980713, "loss_ce": 0.0014936134684830904, "loss_iou": 0.345703125, "loss_num": 0.0791015625, "loss_xval": 1.0859375, "num_input_tokens_seen": 31411648, "step": 501 }, { "epoch": 1.670549084858569, "grad_norm": 41.87156295776367, "learning_rate": 5e-06, "loss": 1.1134, "num_input_tokens_seen": 31474148, "step": 502 }, { "epoch": 1.670549084858569, "loss": 0.7424819469451904, "loss_ce": 0.0009047660860233009, "loss_iou": 0.21875, "loss_num": 0.06103515625, "loss_xval": 0.7421875, "num_input_tokens_seen": 31474148, "step": 502 }, { "epoch": 1.6738768718801995, "grad_norm": 26.744535446166992, "learning_rate": 5e-06, "loss": 0.9912, "num_input_tokens_seen": 31538180, "step": 503 }, { "epoch": 1.6738768718801995, "loss": 1.1642227172851562, "loss_ce": 0.0023575148079544306, "loss_iou": 0.396484375, "loss_num": 0.07421875, "loss_xval": 1.1640625, "num_input_tokens_seen": 31538180, "step": 503 }, { "epoch": 1.6772046589018301, "grad_norm": 10.769326210021973, "learning_rate": 5e-06, "loss": 1.1929, "num_input_tokens_seen": 31601972, "step": 504 }, { "epoch": 1.6772046589018301, "loss": 1.1387523412704468, "loss_ce": 0.0005688150995410979, "loss_iou": 0.3671875, "loss_num": 0.08056640625, "loss_xval": 1.140625, "num_input_tokens_seen": 31601972, "step": 504 }, { "epoch": 1.6805324459234607, "grad_norm": 24.930686950683594, "learning_rate": 5e-06, "loss": 1.0095, "num_input_tokens_seen": 31665256, "step": 505 }, { "epoch": 1.6805324459234607, "loss": 1.1174432039260864, "loss_ce": 0.000499797286465764, "loss_iou": 0.37109375, "loss_num": 0.07470703125, "loss_xval": 1.1171875, "num_input_tokens_seen": 31665256, "step": 505 }, { "epoch": 1.6838602329450914, "grad_norm": 11.738724708557129, "learning_rate": 5e-06, "loss": 0.7514, "num_input_tokens_seen": 31726716, "step": 506 }, { "epoch": 1.6838602329450914, "loss": 0.5557652711868286, "loss_ce": 0.0007115780026651919, "loss_iou": 0.173828125, "loss_num": 0.04150390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 31726716, "step": 506 }, { "epoch": 1.687188019966722, "grad_norm": 8.881688117980957, "learning_rate": 5e-06, "loss": 0.8519, "num_input_tokens_seen": 31789836, "step": 507 }, { "epoch": 1.687188019966722, "loss": 0.9876424670219421, "loss_ce": 0.00033773150062188506, "loss_iou": 0.25, "loss_num": 0.09716796875, "loss_xval": 0.98828125, "num_input_tokens_seen": 31789836, "step": 507 }, { "epoch": 1.6905158069883528, "grad_norm": 13.23851490020752, "learning_rate": 5e-06, "loss": 1.0906, "num_input_tokens_seen": 31853316, "step": 508 }, { "epoch": 1.6905158069883528, "loss": 1.2013055086135864, "loss_ce": 0.0006219673086889088, "loss_iou": 0.31640625, "loss_num": 0.11328125, "loss_xval": 1.203125, "num_input_tokens_seen": 31853316, "step": 508 }, { "epoch": 1.6938435940099834, "grad_norm": 32.30921173095703, "learning_rate": 5e-06, "loss": 1.0942, "num_input_tokens_seen": 31916364, "step": 509 }, { "epoch": 1.6938435940099834, "loss": 1.0613774061203003, "loss_ce": 0.0005864131962880492, "loss_iou": 0.3046875, "loss_num": 0.09033203125, "loss_xval": 1.0625, "num_input_tokens_seen": 31916364, "step": 509 }, { "epoch": 1.697171381031614, "grad_norm": 21.38906478881836, "learning_rate": 5e-06, "loss": 1.1213, "num_input_tokens_seen": 31979748, "step": 510 }, { "epoch": 1.697171381031614, "loss": 1.2083408832550049, "loss_ce": 0.04061625152826309, "loss_iou": 0.359375, "loss_num": 0.08984375, "loss_xval": 1.1640625, "num_input_tokens_seen": 31979748, "step": 510 }, { "epoch": 1.7004991680532446, "grad_norm": 27.368276596069336, "learning_rate": 5e-06, "loss": 1.2503, "num_input_tokens_seen": 32042528, "step": 511 }, { "epoch": 1.7004991680532446, "loss": 1.3059027194976807, "loss_ce": 0.0012151505798101425, "loss_iou": 0.5, "loss_num": 0.060791015625, "loss_xval": 1.3046875, "num_input_tokens_seen": 32042528, "step": 511 }, { "epoch": 1.7038269550748752, "grad_norm": 11.777514457702637, "learning_rate": 5e-06, "loss": 1.0625, "num_input_tokens_seen": 32105152, "step": 512 }, { "epoch": 1.7038269550748752, "loss": 0.9886508584022522, "loss_ce": 0.000857874343637377, "loss_iou": 0.357421875, "loss_num": 0.054443359375, "loss_xval": 0.98828125, "num_input_tokens_seen": 32105152, "step": 512 }, { "epoch": 1.7071547420965059, "grad_norm": 19.91946792602539, "learning_rate": 5e-06, "loss": 0.7893, "num_input_tokens_seen": 32167704, "step": 513 }, { "epoch": 1.7071547420965059, "loss": 0.8008160591125488, "loss_ce": 0.0010113224852830172, "loss_iou": 0.265625, "loss_num": 0.0537109375, "loss_xval": 0.80078125, "num_input_tokens_seen": 32167704, "step": 513 }, { "epoch": 1.7104825291181365, "grad_norm": 24.025108337402344, "learning_rate": 5e-06, "loss": 1.0882, "num_input_tokens_seen": 32231292, "step": 514 }, { "epoch": 1.7104825291181365, "loss": 1.2957994937896729, "loss_ce": 0.0008775498135946691, "loss_iou": 0.453125, "loss_num": 0.07763671875, "loss_xval": 1.296875, "num_input_tokens_seen": 32231292, "step": 514 }, { "epoch": 1.713810316139767, "grad_norm": 22.541730880737305, "learning_rate": 5e-06, "loss": 0.9815, "num_input_tokens_seen": 32294084, "step": 515 }, { "epoch": 1.713810316139767, "loss": 1.1235849857330322, "loss_ce": 0.0005380419315770268, "loss_iou": 0.37890625, "loss_num": 0.07275390625, "loss_xval": 1.125, "num_input_tokens_seen": 32294084, "step": 515 }, { "epoch": 1.7171381031613977, "grad_norm": 17.039142608642578, "learning_rate": 5e-06, "loss": 0.8633, "num_input_tokens_seen": 32355804, "step": 516 }, { "epoch": 1.7171381031613977, "loss": 0.8609045147895813, "loss_ce": 0.0012853904627263546, "loss_iou": 0.228515625, "loss_num": 0.080078125, "loss_xval": 0.859375, "num_input_tokens_seen": 32355804, "step": 516 }, { "epoch": 1.7204658901830283, "grad_norm": 17.313488006591797, "learning_rate": 5e-06, "loss": 1.0722, "num_input_tokens_seen": 32418416, "step": 517 }, { "epoch": 1.7204658901830283, "loss": 0.8409963846206665, "loss_ce": 0.0006643726374022663, "loss_iou": 0.283203125, "loss_num": 0.054931640625, "loss_xval": 0.83984375, "num_input_tokens_seen": 32418416, "step": 517 }, { "epoch": 1.723793677204659, "grad_norm": 21.147789001464844, "learning_rate": 5e-06, "loss": 0.9084, "num_input_tokens_seen": 32480452, "step": 518 }, { "epoch": 1.723793677204659, "loss": 0.6569137573242188, "loss_ce": 0.00041967950528487563, "loss_iou": 0.166015625, "loss_num": 0.06494140625, "loss_xval": 0.65625, "num_input_tokens_seen": 32480452, "step": 518 }, { "epoch": 1.7271214642262895, "grad_norm": 14.255158424377441, "learning_rate": 5e-06, "loss": 0.9042, "num_input_tokens_seen": 32543352, "step": 519 }, { "epoch": 1.7271214642262895, "loss": 0.9606277346611023, "loss_ce": 0.0011550780618563294, "loss_iou": 0.359375, "loss_num": 0.0478515625, "loss_xval": 0.9609375, "num_input_tokens_seen": 32543352, "step": 519 }, { "epoch": 1.7304492512479202, "grad_norm": 23.895132064819336, "learning_rate": 5e-06, "loss": 1.0736, "num_input_tokens_seen": 32606656, "step": 520 }, { "epoch": 1.7304492512479202, "loss": 0.9719319343566895, "loss_ce": 0.004036390222609043, "loss_iou": 0.306640625, "loss_num": 0.07080078125, "loss_xval": 0.96875, "num_input_tokens_seen": 32606656, "step": 520 }, { "epoch": 1.7337770382695508, "grad_norm": 28.39651870727539, "learning_rate": 5e-06, "loss": 1.2689, "num_input_tokens_seen": 32668332, "step": 521 }, { "epoch": 1.7337770382695508, "loss": 1.0842338800430298, "loss_ce": 0.001958527136594057, "loss_iou": 0.3203125, "loss_num": 0.0888671875, "loss_xval": 1.0859375, "num_input_tokens_seen": 32668332, "step": 521 }, { "epoch": 1.7371048252911814, "grad_norm": 14.18319320678711, "learning_rate": 5e-06, "loss": 1.1421, "num_input_tokens_seen": 32730852, "step": 522 }, { "epoch": 1.7371048252911814, "loss": 1.0944100618362427, "loss_ce": 4.9661710363579914e-05, "loss_iou": 0.333984375, "loss_num": 0.0859375, "loss_xval": 1.09375, "num_input_tokens_seen": 32730852, "step": 522 }, { "epoch": 1.740432612312812, "grad_norm": 20.83157730102539, "learning_rate": 5e-06, "loss": 1.2773, "num_input_tokens_seen": 32795496, "step": 523 }, { "epoch": 1.740432612312812, "loss": 1.1278488636016846, "loss_ce": 0.004802019335329533, "loss_iou": 0.421875, "loss_num": 0.055908203125, "loss_xval": 1.125, "num_input_tokens_seen": 32795496, "step": 523 }, { "epoch": 1.7437603993344426, "grad_norm": 16.72551918029785, "learning_rate": 5e-06, "loss": 0.9721, "num_input_tokens_seen": 32860204, "step": 524 }, { "epoch": 1.7437603993344426, "loss": 1.024550437927246, "loss_ce": 0.0004415763833094388, "loss_iou": 0.3671875, "loss_num": 0.05810546875, "loss_xval": 1.0234375, "num_input_tokens_seen": 32860204, "step": 524 }, { "epoch": 1.7470881863560732, "grad_norm": 11.438901901245117, "learning_rate": 5e-06, "loss": 0.8762, "num_input_tokens_seen": 32923856, "step": 525 }, { "epoch": 1.7470881863560732, "loss": 1.0130120515823364, "loss_ce": 0.001415381790138781, "loss_iou": 0.365234375, "loss_num": 0.0556640625, "loss_xval": 1.0078125, "num_input_tokens_seen": 32923856, "step": 525 }, { "epoch": 1.7504159733777038, "grad_norm": 8.21237850189209, "learning_rate": 5e-06, "loss": 0.7951, "num_input_tokens_seen": 32985220, "step": 526 }, { "epoch": 1.7504159733777038, "loss": 0.9805971384048462, "loss_ce": 0.00037258450174704194, "loss_iou": 0.31640625, "loss_num": 0.0693359375, "loss_xval": 0.98046875, "num_input_tokens_seen": 32985220, "step": 526 }, { "epoch": 1.7537437603993344, "grad_norm": 9.287155151367188, "learning_rate": 5e-06, "loss": 0.9109, "num_input_tokens_seen": 33048680, "step": 527 }, { "epoch": 1.7537437603993344, "loss": 0.9192065596580505, "loss_ce": 0.0012378038372844458, "loss_iou": 0.322265625, "loss_num": 0.0546875, "loss_xval": 0.91796875, "num_input_tokens_seen": 33048680, "step": 527 }, { "epoch": 1.757071547420965, "grad_norm": 46.055049896240234, "learning_rate": 5e-06, "loss": 1.0704, "num_input_tokens_seen": 33112220, "step": 528 }, { "epoch": 1.757071547420965, "loss": 1.060153841972351, "loss_ce": 0.0010718015255406499, "loss_iou": 0.34765625, "loss_num": 0.07275390625, "loss_xval": 1.0625, "num_input_tokens_seen": 33112220, "step": 528 }, { "epoch": 1.7603993344425957, "grad_norm": 27.12957000732422, "learning_rate": 5e-06, "loss": 0.865, "num_input_tokens_seen": 33172044, "step": 529 }, { "epoch": 1.7603993344425957, "loss": 0.9046726226806641, "loss_ce": 0.0001316153648076579, "loss_iou": 0.26171875, "loss_num": 0.076171875, "loss_xval": 0.90625, "num_input_tokens_seen": 33172044, "step": 529 }, { "epoch": 1.7637271214642263, "grad_norm": 14.900341987609863, "learning_rate": 5e-06, "loss": 0.811, "num_input_tokens_seen": 33234976, "step": 530 }, { "epoch": 1.7637271214642263, "loss": 0.8328965306282043, "loss_ce": 0.00037697955849580467, "loss_iou": 0.294921875, "loss_num": 0.04833984375, "loss_xval": 0.83203125, "num_input_tokens_seen": 33234976, "step": 530 }, { "epoch": 1.767054908485857, "grad_norm": 19.717370986938477, "learning_rate": 5e-06, "loss": 1.0421, "num_input_tokens_seen": 33298716, "step": 531 }, { "epoch": 1.767054908485857, "loss": 1.0288456678390503, "loss_ce": 0.0005254416028037667, "loss_iou": 0.35546875, "loss_num": 0.0634765625, "loss_xval": 1.03125, "num_input_tokens_seen": 33298716, "step": 531 }, { "epoch": 1.7703826955074875, "grad_norm": 14.97089672088623, "learning_rate": 5e-06, "loss": 0.7139, "num_input_tokens_seen": 33359296, "step": 532 }, { "epoch": 1.7703826955074875, "loss": 0.6194471716880798, "loss_ce": 0.00262587983161211, "loss_iou": 0.220703125, "loss_num": 0.03515625, "loss_xval": 0.6171875, "num_input_tokens_seen": 33359296, "step": 532 }, { "epoch": 1.7737104825291181, "grad_norm": 10.585949897766113, "learning_rate": 5e-06, "loss": 0.893, "num_input_tokens_seen": 33420788, "step": 533 }, { "epoch": 1.7737104825291181, "loss": 0.8950399160385132, "loss_ce": 0.0002645118802320212, "loss_iou": 0.3125, "loss_num": 0.053955078125, "loss_xval": 0.89453125, "num_input_tokens_seen": 33420788, "step": 533 }, { "epoch": 1.7770382695507487, "grad_norm": 10.35478687286377, "learning_rate": 5e-06, "loss": 1.0367, "num_input_tokens_seen": 33484684, "step": 534 }, { "epoch": 1.7770382695507487, "loss": 1.3992263078689575, "loss_ce": 5.641989628202282e-05, "loss_iou": 0.427734375, "loss_num": 0.1083984375, "loss_xval": 1.3984375, "num_input_tokens_seen": 33484684, "step": 534 }, { "epoch": 1.7803660565723793, "grad_norm": 13.746817588806152, "learning_rate": 5e-06, "loss": 0.9671, "num_input_tokens_seen": 33547376, "step": 535 }, { "epoch": 1.7803660565723793, "loss": 1.0142176151275635, "loss_ce": 5.745934322476387e-05, "loss_iou": 0.349609375, "loss_num": 0.06298828125, "loss_xval": 1.015625, "num_input_tokens_seen": 33547376, "step": 535 }, { "epoch": 1.78369384359401, "grad_norm": 16.677959442138672, "learning_rate": 5e-06, "loss": 1.0859, "num_input_tokens_seen": 33610036, "step": 536 }, { "epoch": 1.78369384359401, "loss": 1.0913352966308594, "loss_ce": 0.00027076914557255805, "loss_iou": 0.328125, "loss_num": 0.08642578125, "loss_xval": 1.09375, "num_input_tokens_seen": 33610036, "step": 536 }, { "epoch": 1.7870216306156406, "grad_norm": 10.253283500671387, "learning_rate": 5e-06, "loss": 0.7858, "num_input_tokens_seen": 33672984, "step": 537 }, { "epoch": 1.7870216306156406, "loss": 0.7678014636039734, "loss_ce": 0.0027867835015058517, "loss_iou": 0.26171875, "loss_num": 0.04833984375, "loss_xval": 0.765625, "num_input_tokens_seen": 33672984, "step": 537 }, { "epoch": 1.7903494176372712, "grad_norm": 35.10622024536133, "learning_rate": 5e-06, "loss": 1.0201, "num_input_tokens_seen": 33736132, "step": 538 }, { "epoch": 1.7903494176372712, "loss": 1.0910940170288086, "loss_ce": 0.0012502449098974466, "loss_iou": 0.3125, "loss_num": 0.0927734375, "loss_xval": 1.09375, "num_input_tokens_seen": 33736132, "step": 538 }, { "epoch": 1.7936772046589018, "grad_norm": 12.398226737976074, "learning_rate": 5e-06, "loss": 0.8778, "num_input_tokens_seen": 33799380, "step": 539 }, { "epoch": 1.7936772046589018, "loss": 0.7171542644500732, "loss_ce": 0.0008456680225208402, "loss_iou": 0.251953125, "loss_num": 0.042724609375, "loss_xval": 0.71484375, "num_input_tokens_seen": 33799380, "step": 539 }, { "epoch": 1.7970049916805324, "grad_norm": 14.9580659866333, "learning_rate": 5e-06, "loss": 0.7666, "num_input_tokens_seen": 33862656, "step": 540 }, { "epoch": 1.7970049916805324, "loss": 0.9555954933166504, "loss_ce": 0.0010056574828922749, "loss_iou": 0.302734375, "loss_num": 0.0693359375, "loss_xval": 0.953125, "num_input_tokens_seen": 33862656, "step": 540 }, { "epoch": 1.800332778702163, "grad_norm": 20.136131286621094, "learning_rate": 5e-06, "loss": 0.7634, "num_input_tokens_seen": 33925932, "step": 541 }, { "epoch": 1.800332778702163, "loss": 0.685050368309021, "loss_ce": 0.004142177756875753, "loss_iou": 0.240234375, "loss_num": 0.0400390625, "loss_xval": 0.6796875, "num_input_tokens_seen": 33925932, "step": 541 }, { "epoch": 1.8036605657237936, "grad_norm": 26.61849021911621, "learning_rate": 5e-06, "loss": 1.1369, "num_input_tokens_seen": 33989324, "step": 542 }, { "epoch": 1.8036605657237936, "loss": 1.1945197582244873, "loss_ce": 0.0018927238415926695, "loss_iou": 0.3359375, "loss_num": 0.10400390625, "loss_xval": 1.1953125, "num_input_tokens_seen": 33989324, "step": 542 }, { "epoch": 1.8069883527454242, "grad_norm": 12.676218032836914, "learning_rate": 5e-06, "loss": 1.0277, "num_input_tokens_seen": 34054000, "step": 543 }, { "epoch": 1.8069883527454242, "loss": 0.9486079216003418, "loss_ce": 0.0018306173151358962, "loss_iou": 0.34765625, "loss_num": 0.050048828125, "loss_xval": 0.9453125, "num_input_tokens_seen": 34054000, "step": 543 }, { "epoch": 1.8103161397670549, "grad_norm": 14.024618148803711, "learning_rate": 5e-06, "loss": 1.0651, "num_input_tokens_seen": 34118096, "step": 544 }, { "epoch": 1.8103161397670549, "loss": 1.2064943313598633, "loss_ce": 0.0009278804645873606, "loss_iou": 0.3828125, "loss_num": 0.08837890625, "loss_xval": 1.203125, "num_input_tokens_seen": 34118096, "step": 544 }, { "epoch": 1.8136439267886857, "grad_norm": 15.398441314697266, "learning_rate": 5e-06, "loss": 0.8867, "num_input_tokens_seen": 34181016, "step": 545 }, { "epoch": 1.8136439267886857, "loss": 0.6198215484619141, "loss_ce": 0.00019265212176833302, "loss_iou": 0.2021484375, "loss_num": 0.043212890625, "loss_xval": 0.62109375, "num_input_tokens_seen": 34181016, "step": 545 }, { "epoch": 1.8169717138103163, "grad_norm": 18.64105987548828, "learning_rate": 5e-06, "loss": 1.0866, "num_input_tokens_seen": 34242020, "step": 546 }, { "epoch": 1.8169717138103163, "loss": 0.9077228903770447, "loss_ce": 0.0011066340375691652, "loss_iou": 0.283203125, "loss_num": 0.0673828125, "loss_xval": 0.90625, "num_input_tokens_seen": 34242020, "step": 546 }, { "epoch": 1.820299500831947, "grad_norm": 13.642433166503906, "learning_rate": 5e-06, "loss": 0.9115, "num_input_tokens_seen": 34305132, "step": 547 }, { "epoch": 1.820299500831947, "loss": 0.6709655523300171, "loss_ce": 0.0017760555492714047, "loss_iou": 0.203125, "loss_num": 0.052734375, "loss_xval": 0.66796875, "num_input_tokens_seen": 34305132, "step": 547 }, { "epoch": 1.8236272878535775, "grad_norm": 37.51459503173828, "learning_rate": 5e-06, "loss": 1.0159, "num_input_tokens_seen": 34367900, "step": 548 }, { "epoch": 1.8236272878535775, "loss": 1.0078930854797363, "loss_ce": 0.0005688223754987121, "loss_iou": 0.37890625, "loss_num": 0.050048828125, "loss_xval": 1.0078125, "num_input_tokens_seen": 34367900, "step": 548 }, { "epoch": 1.8269550748752081, "grad_norm": 27.26789093017578, "learning_rate": 5e-06, "loss": 1.1389, "num_input_tokens_seen": 34429256, "step": 549 }, { "epoch": 1.8269550748752081, "loss": 1.143343448638916, "loss_ce": 0.001985959243029356, "loss_iou": 0.32421875, "loss_num": 0.09814453125, "loss_xval": 1.140625, "num_input_tokens_seen": 34429256, "step": 549 }, { "epoch": 1.8302828618968388, "grad_norm": 14.803655624389648, "learning_rate": 5e-06, "loss": 1.3364, "num_input_tokens_seen": 34492152, "step": 550 }, { "epoch": 1.8302828618968388, "loss": 1.4599002599716187, "loss_ce": 0.0004276781983207911, "loss_iou": 0.412109375, "loss_num": 0.126953125, "loss_xval": 1.4609375, "num_input_tokens_seen": 34492152, "step": 550 }, { "epoch": 1.8336106489184694, "grad_norm": 26.294727325439453, "learning_rate": 5e-06, "loss": 1.1966, "num_input_tokens_seen": 34555988, "step": 551 }, { "epoch": 1.8336106489184694, "loss": 1.2220561504364014, "loss_ce": 0.002573746722191572, "loss_iou": 0.404296875, "loss_num": 0.08203125, "loss_xval": 1.21875, "num_input_tokens_seen": 34555988, "step": 551 }, { "epoch": 1.8369384359401, "grad_norm": 14.449104309082031, "learning_rate": 5e-06, "loss": 0.9287, "num_input_tokens_seen": 34619232, "step": 552 }, { "epoch": 1.8369384359401, "loss": 0.9966768026351929, "loss_ce": 0.0013155062915757298, "loss_iou": 0.3671875, "loss_num": 0.052490234375, "loss_xval": 0.99609375, "num_input_tokens_seen": 34619232, "step": 552 }, { "epoch": 1.8402662229617306, "grad_norm": 23.511919021606445, "learning_rate": 5e-06, "loss": 1.0271, "num_input_tokens_seen": 34682704, "step": 553 }, { "epoch": 1.8402662229617306, "loss": 1.2029976844787598, "loss_ce": 0.0035348027013242245, "loss_iou": 0.4765625, "loss_num": 0.049072265625, "loss_xval": 1.203125, "num_input_tokens_seen": 34682704, "step": 553 }, { "epoch": 1.8435940099833612, "grad_norm": 12.143444061279297, "learning_rate": 5e-06, "loss": 0.8477, "num_input_tokens_seen": 34745964, "step": 554 }, { "epoch": 1.8435940099833612, "loss": 1.0100423097610474, "loss_ce": 0.00012407411122694612, "loss_iou": 0.37109375, "loss_num": 0.0537109375, "loss_xval": 1.0078125, "num_input_tokens_seen": 34745964, "step": 554 }, { "epoch": 1.8469217970049918, "grad_norm": 26.108985900878906, "learning_rate": 5e-06, "loss": 0.9329, "num_input_tokens_seen": 34808712, "step": 555 }, { "epoch": 1.8469217970049918, "loss": 0.9639697074890137, "loss_ce": 0.0013231942430138588, "loss_iou": 0.326171875, "loss_num": 0.06201171875, "loss_xval": 0.9609375, "num_input_tokens_seen": 34808712, "step": 555 }, { "epoch": 1.8502495840266224, "grad_norm": 18.90593147277832, "learning_rate": 5e-06, "loss": 1.0983, "num_input_tokens_seen": 34871224, "step": 556 }, { "epoch": 1.8502495840266224, "loss": 1.1608165502548218, "loss_ce": 0.0004161183023825288, "loss_iou": 0.4375, "loss_num": 0.056884765625, "loss_xval": 1.1640625, "num_input_tokens_seen": 34871224, "step": 556 }, { "epoch": 1.853577371048253, "grad_norm": 19.406909942626953, "learning_rate": 5e-06, "loss": 1.0324, "num_input_tokens_seen": 34931504, "step": 557 }, { "epoch": 1.853577371048253, "loss": 1.2264602184295654, "loss_ce": 0.0003859272401314229, "loss_iou": 0.421875, "loss_num": 0.07666015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 34931504, "step": 557 }, { "epoch": 1.8569051580698837, "grad_norm": 12.974056243896484, "learning_rate": 5e-06, "loss": 0.7937, "num_input_tokens_seen": 34994100, "step": 558 }, { "epoch": 1.8569051580698837, "loss": 0.8865928649902344, "loss_ce": 0.0032920846715569496, "loss_iou": 0.283203125, "loss_num": 0.06298828125, "loss_xval": 0.8828125, "num_input_tokens_seen": 34994100, "step": 558 }, { "epoch": 1.8602329450915143, "grad_norm": 13.932268142700195, "learning_rate": 5e-06, "loss": 1.1015, "num_input_tokens_seen": 35056884, "step": 559 }, { "epoch": 1.8602329450915143, "loss": 1.1955981254577637, "loss_ce": 0.0007738770800642669, "loss_iou": 0.435546875, "loss_num": 0.064453125, "loss_xval": 1.1953125, "num_input_tokens_seen": 35056884, "step": 559 }, { "epoch": 1.8635607321131449, "grad_norm": 14.666391372680664, "learning_rate": 5e-06, "loss": 0.8643, "num_input_tokens_seen": 35120340, "step": 560 }, { "epoch": 1.8635607321131449, "loss": 0.886032223701477, "loss_ce": 0.0012666526017710567, "loss_iou": 0.283203125, "loss_num": 0.0634765625, "loss_xval": 0.8828125, "num_input_tokens_seen": 35120340, "step": 560 }, { "epoch": 1.8668885191347755, "grad_norm": 13.323609352111816, "learning_rate": 5e-06, "loss": 1.1881, "num_input_tokens_seen": 35183468, "step": 561 }, { "epoch": 1.8668885191347755, "loss": 1.152817964553833, "loss_ce": 0.0019389993976801634, "loss_iou": 0.36328125, "loss_num": 0.08544921875, "loss_xval": 1.1484375, "num_input_tokens_seen": 35183468, "step": 561 }, { "epoch": 1.870216306156406, "grad_norm": 9.693658828735352, "learning_rate": 5e-06, "loss": 1.1869, "num_input_tokens_seen": 35247208, "step": 562 }, { "epoch": 1.870216306156406, "loss": 1.3116389513015747, "loss_ce": 0.0006038228166289628, "loss_iou": 0.44921875, "loss_num": 0.08251953125, "loss_xval": 1.3125, "num_input_tokens_seen": 35247208, "step": 562 }, { "epoch": 1.8735440931780367, "grad_norm": 29.306072235107422, "learning_rate": 5e-06, "loss": 1.1472, "num_input_tokens_seen": 35310828, "step": 563 }, { "epoch": 1.8735440931780367, "loss": 1.2778418064117432, "loss_ce": 0.0068457284942269325, "loss_iou": 0.44140625, "loss_num": 0.078125, "loss_xval": 1.2734375, "num_input_tokens_seen": 35310828, "step": 563 }, { "epoch": 1.8768718801996673, "grad_norm": 27.0247745513916, "learning_rate": 5e-06, "loss": 0.9282, "num_input_tokens_seen": 35373164, "step": 564 }, { "epoch": 1.8768718801996673, "loss": 0.9917428493499756, "loss_ce": 0.00028781441505998373, "loss_iou": 0.345703125, "loss_num": 0.059814453125, "loss_xval": 0.9921875, "num_input_tokens_seen": 35373164, "step": 564 }, { "epoch": 1.880199667221298, "grad_norm": 14.91286563873291, "learning_rate": 5e-06, "loss": 0.9329, "num_input_tokens_seen": 35435044, "step": 565 }, { "epoch": 1.880199667221298, "loss": 1.015300989151001, "loss_ce": 0.00016425539797637612, "loss_iou": 0.310546875, "loss_num": 0.07958984375, "loss_xval": 1.015625, "num_input_tokens_seen": 35435044, "step": 565 }, { "epoch": 1.8835274542429286, "grad_norm": 10.378997802734375, "learning_rate": 5e-06, "loss": 0.9905, "num_input_tokens_seen": 35497864, "step": 566 }, { "epoch": 1.8835274542429286, "loss": 1.1247049570083618, "loss_ce": 0.0009256677003577352, "loss_iou": 0.349609375, "loss_num": 0.0849609375, "loss_xval": 1.125, "num_input_tokens_seen": 35497864, "step": 566 }, { "epoch": 1.8868552412645592, "grad_norm": 17.83684539794922, "learning_rate": 5e-06, "loss": 0.929, "num_input_tokens_seen": 35560536, "step": 567 }, { "epoch": 1.8868552412645592, "loss": 0.6144291162490845, "loss_ce": 0.00017130836204160005, "loss_iou": 0.1376953125, "loss_num": 0.06787109375, "loss_xval": 0.61328125, "num_input_tokens_seen": 35560536, "step": 567 }, { "epoch": 1.8901830282861898, "grad_norm": 49.60697937011719, "learning_rate": 5e-06, "loss": 0.9559, "num_input_tokens_seen": 35622356, "step": 568 }, { "epoch": 1.8901830282861898, "loss": 1.1087956428527832, "loss_ce": 0.0001530621957499534, "loss_iou": 0.2392578125, "loss_num": 0.1259765625, "loss_xval": 1.109375, "num_input_tokens_seen": 35622356, "step": 568 }, { "epoch": 1.8935108153078204, "grad_norm": 14.60826301574707, "learning_rate": 5e-06, "loss": 1.121, "num_input_tokens_seen": 35685972, "step": 569 }, { "epoch": 1.8935108153078204, "loss": 1.326939582824707, "loss_ce": 0.0022325818426907063, "loss_iou": 0.380859375, "loss_num": 0.11279296875, "loss_xval": 1.328125, "num_input_tokens_seen": 35685972, "step": 569 }, { "epoch": 1.896838602329451, "grad_norm": 25.41444969177246, "learning_rate": 5e-06, "loss": 0.8218, "num_input_tokens_seen": 35747280, "step": 570 }, { "epoch": 1.896838602329451, "loss": 0.8077418804168701, "loss_ce": 0.0006130048423074186, "loss_iou": 0.2216796875, "loss_num": 0.07275390625, "loss_xval": 0.80859375, "num_input_tokens_seen": 35747280, "step": 570 }, { "epoch": 1.9001663893510816, "grad_norm": 24.91752052307129, "learning_rate": 5e-06, "loss": 1.1637, "num_input_tokens_seen": 35811528, "step": 571 }, { "epoch": 1.9001663893510816, "loss": 1.3153760433197021, "loss_ce": 0.0033644186332821846, "loss_iou": 0.486328125, "loss_num": 0.06787109375, "loss_xval": 1.3125, "num_input_tokens_seen": 35811528, "step": 571 }, { "epoch": 1.9034941763727122, "grad_norm": 25.198698043823242, "learning_rate": 5e-06, "loss": 1.0152, "num_input_tokens_seen": 35874204, "step": 572 }, { "epoch": 1.9034941763727122, "loss": 1.3468836545944214, "loss_ce": 0.0002039041864918545, "loss_iou": 0.4453125, "loss_num": 0.09130859375, "loss_xval": 1.34375, "num_input_tokens_seen": 35874204, "step": 572 }, { "epoch": 1.9068219633943428, "grad_norm": 17.966543197631836, "learning_rate": 5e-06, "loss": 0.8496, "num_input_tokens_seen": 35936360, "step": 573 }, { "epoch": 1.9068219633943428, "loss": 1.1128754615783691, "loss_ce": 0.00032671194639988244, "loss_iou": 0.376953125, "loss_num": 0.0712890625, "loss_xval": 1.109375, "num_input_tokens_seen": 35936360, "step": 573 }, { "epoch": 1.9101497504159735, "grad_norm": 22.120344161987305, "learning_rate": 5e-06, "loss": 1.0125, "num_input_tokens_seen": 35998784, "step": 574 }, { "epoch": 1.9101497504159735, "loss": 1.294684648513794, "loss_ce": 0.0002511721686460078, "loss_iou": 0.416015625, "loss_num": 0.09228515625, "loss_xval": 1.296875, "num_input_tokens_seen": 35998784, "step": 574 }, { "epoch": 1.913477537437604, "grad_norm": 14.170149803161621, "learning_rate": 5e-06, "loss": 0.9052, "num_input_tokens_seen": 36061612, "step": 575 }, { "epoch": 1.913477537437604, "loss": 0.7781501412391663, "loss_ce": 0.0025154012255370617, "loss_iou": 0.2392578125, "loss_num": 0.05908203125, "loss_xval": 0.77734375, "num_input_tokens_seen": 36061612, "step": 575 }, { "epoch": 1.9168053244592347, "grad_norm": 38.99104309082031, "learning_rate": 5e-06, "loss": 1.1207, "num_input_tokens_seen": 36124752, "step": 576 }, { "epoch": 1.9168053244592347, "loss": 1.3191872835159302, "loss_ce": 0.0008279454777948558, "loss_iou": 0.443359375, "loss_num": 0.0859375, "loss_xval": 1.3203125, "num_input_tokens_seen": 36124752, "step": 576 }, { "epoch": 1.9201331114808653, "grad_norm": 18.79828453063965, "learning_rate": 5e-06, "loss": 1.0237, "num_input_tokens_seen": 36185564, "step": 577 }, { "epoch": 1.9201331114808653, "loss": 1.0448555946350098, "loss_ce": 0.0010322753805667162, "loss_iou": 0.310546875, "loss_num": 0.08447265625, "loss_xval": 1.046875, "num_input_tokens_seen": 36185564, "step": 577 }, { "epoch": 1.923460898502496, "grad_norm": 8.662841796875, "learning_rate": 5e-06, "loss": 0.8287, "num_input_tokens_seen": 36248000, "step": 578 }, { "epoch": 1.923460898502496, "loss": 0.7125368118286133, "loss_ce": 0.00013448702520690858, "loss_iou": 0.2470703125, "loss_num": 0.043212890625, "loss_xval": 0.7109375, "num_input_tokens_seen": 36248000, "step": 578 }, { "epoch": 1.9267886855241265, "grad_norm": 24.12902069091797, "learning_rate": 5e-06, "loss": 0.9049, "num_input_tokens_seen": 36310656, "step": 579 }, { "epoch": 1.9267886855241265, "loss": 0.8042501211166382, "loss_ce": 0.0005391854792833328, "loss_iou": 0.2138671875, "loss_num": 0.0751953125, "loss_xval": 0.8046875, "num_input_tokens_seen": 36310656, "step": 579 }, { "epoch": 1.9301164725457571, "grad_norm": 14.56750202178955, "learning_rate": 5e-06, "loss": 0.8137, "num_input_tokens_seen": 36373116, "step": 580 }, { "epoch": 1.9301164725457571, "loss": 0.6355822086334229, "loss_ce": 0.003990426659584045, "loss_iou": 0.19921875, "loss_num": 0.04638671875, "loss_xval": 0.6328125, "num_input_tokens_seen": 36373116, "step": 580 }, { "epoch": 1.9334442595673877, "grad_norm": 17.040950775146484, "learning_rate": 5e-06, "loss": 0.9652, "num_input_tokens_seen": 36434072, "step": 581 }, { "epoch": 1.9334442595673877, "loss": 1.1782926321029663, "loss_ce": 0.004586563445627689, "loss_iou": 0.33984375, "loss_num": 0.0986328125, "loss_xval": 1.171875, "num_input_tokens_seen": 36434072, "step": 581 }, { "epoch": 1.9367720465890184, "grad_norm": 10.302577018737793, "learning_rate": 5e-06, "loss": 0.7532, "num_input_tokens_seen": 36496096, "step": 582 }, { "epoch": 1.9367720465890184, "loss": 0.8137335777282715, "loss_ce": 0.0005012074834667146, "loss_iou": 0.291015625, "loss_num": 0.046142578125, "loss_xval": 0.8125, "num_input_tokens_seen": 36496096, "step": 582 }, { "epoch": 1.940099833610649, "grad_norm": 15.283492088317871, "learning_rate": 5e-06, "loss": 0.9993, "num_input_tokens_seen": 36558260, "step": 583 }, { "epoch": 1.940099833610649, "loss": 0.8162379860877991, "loss_ce": 0.00044212467037141323, "loss_iou": 0.26953125, "loss_num": 0.055419921875, "loss_xval": 0.81640625, "num_input_tokens_seen": 36558260, "step": 583 }, { "epoch": 1.9434276206322796, "grad_norm": 10.120014190673828, "learning_rate": 5e-06, "loss": 0.6606, "num_input_tokens_seen": 36620356, "step": 584 }, { "epoch": 1.9434276206322796, "loss": 0.6671352386474609, "loss_ce": 0.00014302069030236453, "loss_iou": 0.236328125, "loss_num": 0.039306640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 36620356, "step": 584 }, { "epoch": 1.9467554076539102, "grad_norm": 10.893766403198242, "learning_rate": 5e-06, "loss": 0.9976, "num_input_tokens_seen": 36683052, "step": 585 }, { "epoch": 1.9467554076539102, "loss": 0.9768415689468384, "loss_ce": 0.0027204775251448154, "loss_iou": 0.30859375, "loss_num": 0.0712890625, "loss_xval": 0.97265625, "num_input_tokens_seen": 36683052, "step": 585 }, { "epoch": 1.9500831946755408, "grad_norm": 22.243104934692383, "learning_rate": 5e-06, "loss": 1.0094, "num_input_tokens_seen": 36745780, "step": 586 }, { "epoch": 1.9500831946755408, "loss": 0.975989043712616, "loss_ce": 0.0006472546374425292, "loss_iou": 0.314453125, "loss_num": 0.06884765625, "loss_xval": 0.9765625, "num_input_tokens_seen": 36745780, "step": 586 }, { "epoch": 1.9534109816971714, "grad_norm": 17.508132934570312, "learning_rate": 5e-06, "loss": 1.1263, "num_input_tokens_seen": 36809268, "step": 587 }, { "epoch": 1.9534109816971714, "loss": 0.9999065399169922, "loss_ce": 0.0018596657318994403, "loss_iou": 0.27734375, "loss_num": 0.08837890625, "loss_xval": 1.0, "num_input_tokens_seen": 36809268, "step": 587 }, { "epoch": 1.956738768718802, "grad_norm": 13.245006561279297, "learning_rate": 5e-06, "loss": 0.8024, "num_input_tokens_seen": 36873044, "step": 588 }, { "epoch": 1.956738768718802, "loss": 0.6673635244369507, "loss_ce": 0.00024929430219344795, "loss_iou": 0.2734375, "loss_num": 0.0240478515625, "loss_xval": 0.66796875, "num_input_tokens_seen": 36873044, "step": 588 }, { "epoch": 1.9600665557404326, "grad_norm": 9.61874008178711, "learning_rate": 5e-06, "loss": 1.037, "num_input_tokens_seen": 36935692, "step": 589 }, { "epoch": 1.9600665557404326, "loss": 0.9211355447769165, "loss_ce": 0.0014577455585822463, "loss_iou": 0.3203125, "loss_num": 0.05615234375, "loss_xval": 0.91796875, "num_input_tokens_seen": 36935692, "step": 589 }, { "epoch": 1.9633943427620633, "grad_norm": 10.554235458374023, "learning_rate": 5e-06, "loss": 0.9399, "num_input_tokens_seen": 36997520, "step": 590 }, { "epoch": 1.9633943427620633, "loss": 1.0528907775878906, "loss_ce": 0.0004004701040685177, "loss_iou": 0.3515625, "loss_num": 0.0703125, "loss_xval": 1.0546875, "num_input_tokens_seen": 36997520, "step": 590 }, { "epoch": 1.9667221297836939, "grad_norm": 17.416133880615234, "learning_rate": 5e-06, "loss": 0.7433, "num_input_tokens_seen": 37060460, "step": 591 }, { "epoch": 1.9667221297836939, "loss": 0.858521044254303, "loss_ce": 0.0008550334605388343, "loss_iou": 0.25390625, "loss_num": 0.0703125, "loss_xval": 0.859375, "num_input_tokens_seen": 37060460, "step": 591 }, { "epoch": 1.9700499168053245, "grad_norm": 14.890344619750977, "learning_rate": 5e-06, "loss": 0.9326, "num_input_tokens_seen": 37123204, "step": 592 }, { "epoch": 1.9700499168053245, "loss": 0.8313470482826233, "loss_ce": 0.013231809251010418, "loss_iou": 0.240234375, "loss_num": 0.06787109375, "loss_xval": 0.81640625, "num_input_tokens_seen": 37123204, "step": 592 }, { "epoch": 1.973377703826955, "grad_norm": 14.116703987121582, "learning_rate": 5e-06, "loss": 1.1915, "num_input_tokens_seen": 37185468, "step": 593 }, { "epoch": 1.973377703826955, "loss": 1.041740894317627, "loss_ce": 0.000511517224367708, "loss_iou": 0.38671875, "loss_num": 0.053466796875, "loss_xval": 1.0390625, "num_input_tokens_seen": 37185468, "step": 593 }, { "epoch": 1.9767054908485857, "grad_norm": 10.910758018493652, "learning_rate": 5e-06, "loss": 0.7212, "num_input_tokens_seen": 37245908, "step": 594 }, { "epoch": 1.9767054908485857, "loss": 0.6745211482048035, "loss_ce": 0.001547462772578001, "loss_iou": 0.18359375, "loss_num": 0.060791015625, "loss_xval": 0.671875, "num_input_tokens_seen": 37245908, "step": 594 }, { "epoch": 1.9800332778702163, "grad_norm": 11.645059585571289, "learning_rate": 5e-06, "loss": 0.9004, "num_input_tokens_seen": 37310236, "step": 595 }, { "epoch": 1.9800332778702163, "loss": 0.8556256294250488, "loss_ce": 0.006992822512984276, "loss_iou": 0.31640625, "loss_num": 0.043212890625, "loss_xval": 0.84765625, "num_input_tokens_seen": 37310236, "step": 595 }, { "epoch": 1.983361064891847, "grad_norm": 12.184340476989746, "learning_rate": 5e-06, "loss": 1.0324, "num_input_tokens_seen": 37371568, "step": 596 }, { "epoch": 1.983361064891847, "loss": 1.0166329145431519, "loss_ce": 0.0027778963558375835, "loss_iou": 0.33203125, "loss_num": 0.0703125, "loss_xval": 1.015625, "num_input_tokens_seen": 37371568, "step": 596 }, { "epoch": 1.9866888519134775, "grad_norm": 10.037638664245605, "learning_rate": 5e-06, "loss": 0.6938, "num_input_tokens_seen": 37434188, "step": 597 }, { "epoch": 1.9866888519134775, "loss": 0.8472949266433716, "loss_ce": 0.0015917927958071232, "loss_iou": 0.2197265625, "loss_num": 0.08154296875, "loss_xval": 0.84375, "num_input_tokens_seen": 37434188, "step": 597 }, { "epoch": 1.9900166389351082, "grad_norm": 28.653364181518555, "learning_rate": 5e-06, "loss": 1.3632, "num_input_tokens_seen": 37497236, "step": 598 }, { "epoch": 1.9900166389351082, "loss": 1.1080653667449951, "loss_ce": 0.00015516526764258742, "loss_iou": 0.369140625, "loss_num": 0.07421875, "loss_xval": 1.109375, "num_input_tokens_seen": 37497236, "step": 598 }, { "epoch": 1.9933444259567388, "grad_norm": 63.16008377075195, "learning_rate": 5e-06, "loss": 0.984, "num_input_tokens_seen": 37559724, "step": 599 }, { "epoch": 1.9933444259567388, "loss": 1.0816211700439453, "loss_ce": 0.00044438272016122937, "loss_iou": 0.359375, "loss_num": 0.072265625, "loss_xval": 1.078125, "num_input_tokens_seen": 37559724, "step": 599 }, { "epoch": 1.9966722129783694, "grad_norm": 12.790926933288574, "learning_rate": 5e-06, "loss": 1.2626, "num_input_tokens_seen": 37623232, "step": 600 }, { "epoch": 1.9966722129783694, "loss": 1.700850009918213, "loss_ce": 0.0013871309347450733, "loss_iou": 0.5078125, "loss_num": 0.13671875, "loss_xval": 1.703125, "num_input_tokens_seen": 37623232, "step": 600 }, { "epoch": 2.0, "grad_norm": 17.36669158935547, "learning_rate": 5e-06, "loss": 0.7352, "num_input_tokens_seen": 37686136, "step": 601 }, { "epoch": 2.0, "loss": 0.7832919359207153, "loss_ce": 0.0008212359971366823, "loss_iou": 0.205078125, "loss_num": 0.07470703125, "loss_xval": 0.78125, "num_input_tokens_seen": 37686136, "step": 601 }, { "epoch": 2.0033277870216306, "grad_norm": 18.40623664855957, "learning_rate": 5e-06, "loss": 1.1938, "num_input_tokens_seen": 37749472, "step": 602 }, { "epoch": 2.0033277870216306, "loss": 1.2827110290527344, "loss_ce": 0.00048443087143823504, "loss_iou": 0.447265625, "loss_num": 0.07763671875, "loss_xval": 1.28125, "num_input_tokens_seen": 37749472, "step": 602 }, { "epoch": 2.0066555740432612, "grad_norm": 12.205756187438965, "learning_rate": 5e-06, "loss": 0.5865, "num_input_tokens_seen": 37811236, "step": 603 }, { "epoch": 2.0066555740432612, "loss": 0.42705678939819336, "loss_ce": 0.00029895632178522646, "loss_iou": 0.0, "loss_num": 0.08544921875, "loss_xval": 0.42578125, "num_input_tokens_seen": 37811236, "step": 603 }, { "epoch": 2.009983361064892, "grad_norm": 10.160212516784668, "learning_rate": 5e-06, "loss": 0.5894, "num_input_tokens_seen": 37872200, "step": 604 }, { "epoch": 2.009983361064892, "loss": 0.7009243965148926, "loss_ce": 0.00024079754075501114, "loss_iou": 0.1689453125, "loss_num": 0.07275390625, "loss_xval": 0.69921875, "num_input_tokens_seen": 37872200, "step": 604 }, { "epoch": 2.0133111480865225, "grad_norm": 13.080687522888184, "learning_rate": 5e-06, "loss": 0.7835, "num_input_tokens_seen": 37933716, "step": 605 }, { "epoch": 2.0133111480865225, "loss": 1.0986953973770142, "loss_ce": 6.257939094211906e-05, "loss_iou": 0.3828125, "loss_num": 0.06640625, "loss_xval": 1.1015625, "num_input_tokens_seen": 37933716, "step": 605 }, { "epoch": 2.016638935108153, "grad_norm": 10.649163246154785, "learning_rate": 5e-06, "loss": 0.7663, "num_input_tokens_seen": 37994536, "step": 606 }, { "epoch": 2.016638935108153, "loss": 0.9843522310256958, "loss_ce": 0.0007096048793755472, "loss_iou": 0.275390625, "loss_num": 0.0869140625, "loss_xval": 0.984375, "num_input_tokens_seen": 37994536, "step": 606 }, { "epoch": 2.0199667221297837, "grad_norm": 10.702330589294434, "learning_rate": 5e-06, "loss": 0.6246, "num_input_tokens_seen": 38057508, "step": 607 }, { "epoch": 2.0199667221297837, "loss": 0.6482642889022827, "loss_ce": 0.0010474994778633118, "loss_iou": 0.2197265625, "loss_num": 0.041748046875, "loss_xval": 0.6484375, "num_input_tokens_seen": 38057508, "step": 607 }, { "epoch": 2.0232945091514143, "grad_norm": 12.19349479675293, "learning_rate": 5e-06, "loss": 0.7891, "num_input_tokens_seen": 38119664, "step": 608 }, { "epoch": 2.0232945091514143, "loss": 0.8228746652603149, "loss_ce": 0.0001207729583256878, "loss_iou": 0.2470703125, "loss_num": 0.06591796875, "loss_xval": 0.82421875, "num_input_tokens_seen": 38119664, "step": 608 }, { "epoch": 2.026622296173045, "grad_norm": 30.340932846069336, "learning_rate": 5e-06, "loss": 1.0127, "num_input_tokens_seen": 38184528, "step": 609 }, { "epoch": 2.026622296173045, "loss": 1.135079026222229, "loss_ce": 0.0008016878855414689, "loss_iou": 0.45703125, "loss_num": 0.0439453125, "loss_xval": 1.1328125, "num_input_tokens_seen": 38184528, "step": 609 }, { "epoch": 2.0299500831946755, "grad_norm": 19.663864135742188, "learning_rate": 5e-06, "loss": 1.0414, "num_input_tokens_seen": 38247428, "step": 610 }, { "epoch": 2.0299500831946755, "loss": 1.2719979286193848, "loss_ce": 0.00026946913567371666, "loss_iou": 0.478515625, "loss_num": 0.0634765625, "loss_xval": 1.2734375, "num_input_tokens_seen": 38247428, "step": 610 }, { "epoch": 2.033277870216306, "grad_norm": 38.78046798706055, "learning_rate": 5e-06, "loss": 0.6704, "num_input_tokens_seen": 38309384, "step": 611 }, { "epoch": 2.033277870216306, "loss": 0.9232205152511597, "loss_ce": 0.0001247778272954747, "loss_iou": 0.302734375, "loss_num": 0.0634765625, "loss_xval": 0.921875, "num_input_tokens_seen": 38309384, "step": 611 }, { "epoch": 2.0366056572379367, "grad_norm": 21.593708038330078, "learning_rate": 5e-06, "loss": 0.8872, "num_input_tokens_seen": 38373688, "step": 612 }, { "epoch": 2.0366056572379367, "loss": 0.8260167837142944, "loss_ce": 8.90689916559495e-05, "loss_iou": 0.291015625, "loss_num": 0.048828125, "loss_xval": 0.82421875, "num_input_tokens_seen": 38373688, "step": 612 }, { "epoch": 2.0399334442595674, "grad_norm": 28.829166412353516, "learning_rate": 5e-06, "loss": 1.195, "num_input_tokens_seen": 38435916, "step": 613 }, { "epoch": 2.0399334442595674, "loss": 1.4014410972595215, "loss_ce": 0.000562186527531594, "loss_iou": 0.47265625, "loss_num": 0.091796875, "loss_xval": 1.3984375, "num_input_tokens_seen": 38435916, "step": 613 }, { "epoch": 2.043261231281198, "grad_norm": 30.802501678466797, "learning_rate": 5e-06, "loss": 0.928, "num_input_tokens_seen": 38499540, "step": 614 }, { "epoch": 2.043261231281198, "loss": 1.0729480981826782, "loss_ce": 0.017283970490098, "loss_iou": 0.33984375, "loss_num": 0.0751953125, "loss_xval": 1.0546875, "num_input_tokens_seen": 38499540, "step": 614 }, { "epoch": 2.0465890183028286, "grad_norm": 14.069050788879395, "learning_rate": 5e-06, "loss": 0.9339, "num_input_tokens_seen": 38563296, "step": 615 }, { "epoch": 2.0465890183028286, "loss": 1.0433433055877686, "loss_ce": 0.01477888599038124, "loss_iou": 0.3984375, "loss_num": 0.046630859375, "loss_xval": 1.03125, "num_input_tokens_seen": 38563296, "step": 615 }, { "epoch": 2.049916805324459, "grad_norm": 8.753564834594727, "learning_rate": 5e-06, "loss": 0.71, "num_input_tokens_seen": 38624788, "step": 616 }, { "epoch": 2.049916805324459, "loss": 0.8744981288909912, "loss_ce": 0.006578153930604458, "loss_iou": 0.28125, "loss_num": 0.061279296875, "loss_xval": 0.8671875, "num_input_tokens_seen": 38624788, "step": 616 }, { "epoch": 2.05324459234609, "grad_norm": 21.490896224975586, "learning_rate": 5e-06, "loss": 1.1, "num_input_tokens_seen": 38689684, "step": 617 }, { "epoch": 2.05324459234609, "loss": 1.2144798040390015, "loss_ce": 0.0028098488692194223, "loss_iou": 0.4453125, "loss_num": 0.064453125, "loss_xval": 1.2109375, "num_input_tokens_seen": 38689684, "step": 617 }, { "epoch": 2.0565723793677204, "grad_norm": 14.222006797790527, "learning_rate": 5e-06, "loss": 0.6892, "num_input_tokens_seen": 38752120, "step": 618 }, { "epoch": 2.0565723793677204, "loss": 0.7122447490692139, "loss_ce": 0.0017955549992620945, "loss_iou": 0.2021484375, "loss_num": 0.0615234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 38752120, "step": 618 }, { "epoch": 2.059900166389351, "grad_norm": 13.638813972473145, "learning_rate": 5e-06, "loss": 0.8149, "num_input_tokens_seen": 38812068, "step": 619 }, { "epoch": 2.059900166389351, "loss": 1.018181562423706, "loss_ce": 0.00011516415543155745, "loss_iou": 0.291015625, "loss_num": 0.087890625, "loss_xval": 1.015625, "num_input_tokens_seen": 38812068, "step": 619 }, { "epoch": 2.0632279534109816, "grad_norm": 16.652631759643555, "learning_rate": 5e-06, "loss": 0.7474, "num_input_tokens_seen": 38874664, "step": 620 }, { "epoch": 2.0632279534109816, "loss": 0.42710453271865845, "loss_ce": 0.00028570188442245126, "loss_iou": 0.09228515625, "loss_num": 0.048583984375, "loss_xval": 0.427734375, "num_input_tokens_seen": 38874664, "step": 620 }, { "epoch": 2.0665557404326123, "grad_norm": 22.291889190673828, "learning_rate": 5e-06, "loss": 0.9719, "num_input_tokens_seen": 38938248, "step": 621 }, { "epoch": 2.0665557404326123, "loss": 0.9289200305938721, "loss_ce": 0.00020914892957080156, "loss_iou": 0.318359375, "loss_num": 0.05810546875, "loss_xval": 0.9296875, "num_input_tokens_seen": 38938248, "step": 621 }, { "epoch": 2.069883527454243, "grad_norm": 21.66976547241211, "learning_rate": 5e-06, "loss": 0.6807, "num_input_tokens_seen": 39000808, "step": 622 }, { "epoch": 2.069883527454243, "loss": 0.7833365201950073, "loss_ce": 0.0003775203076656908, "loss_iou": 0.267578125, "loss_num": 0.0498046875, "loss_xval": 0.78125, "num_input_tokens_seen": 39000808, "step": 622 }, { "epoch": 2.0732113144758735, "grad_norm": 13.521284103393555, "learning_rate": 5e-06, "loss": 0.9122, "num_input_tokens_seen": 39064556, "step": 623 }, { "epoch": 2.0732113144758735, "loss": 0.8654112815856934, "loss_ce": 0.0004210532642900944, "loss_iou": 0.318359375, "loss_num": 0.04541015625, "loss_xval": 0.86328125, "num_input_tokens_seen": 39064556, "step": 623 }, { "epoch": 2.076539101497504, "grad_norm": 9.097267150878906, "learning_rate": 5e-06, "loss": 0.7518, "num_input_tokens_seen": 39127724, "step": 624 }, { "epoch": 2.076539101497504, "loss": 0.8574149012565613, "loss_ce": 0.0007254519150592387, "loss_iou": 0.2734375, "loss_num": 0.0615234375, "loss_xval": 0.85546875, "num_input_tokens_seen": 39127724, "step": 624 }, { "epoch": 2.0798668885191347, "grad_norm": 62.03600311279297, "learning_rate": 5e-06, "loss": 0.779, "num_input_tokens_seen": 39191480, "step": 625 }, { "epoch": 2.0798668885191347, "loss": 0.8742235898971558, "loss_ce": 7.806658686604351e-05, "loss_iou": 0.296875, "loss_num": 0.05615234375, "loss_xval": 0.875, "num_input_tokens_seen": 39191480, "step": 625 }, { "epoch": 2.0831946755407653, "grad_norm": 37.185428619384766, "learning_rate": 5e-06, "loss": 0.7275, "num_input_tokens_seen": 39252676, "step": 626 }, { "epoch": 2.0831946755407653, "loss": 0.7918392419815063, "loss_ce": 0.0015560202300548553, "loss_iou": 0.234375, "loss_num": 0.064453125, "loss_xval": 0.7890625, "num_input_tokens_seen": 39252676, "step": 626 }, { "epoch": 2.086522462562396, "grad_norm": 13.905028343200684, "learning_rate": 5e-06, "loss": 0.5983, "num_input_tokens_seen": 39314092, "step": 627 }, { "epoch": 2.086522462562396, "loss": 0.29983216524124146, "loss_ce": 0.00045470561599358916, "loss_iou": 0.0, "loss_num": 0.059814453125, "loss_xval": 0.298828125, "num_input_tokens_seen": 39314092, "step": 627 }, { "epoch": 2.0898502495840265, "grad_norm": 21.244510650634766, "learning_rate": 5e-06, "loss": 0.8911, "num_input_tokens_seen": 39377644, "step": 628 }, { "epoch": 2.0898502495840265, "loss": 0.9056867361068726, "loss_ce": 0.00016913384024519473, "loss_iou": 0.3359375, "loss_num": 0.046630859375, "loss_xval": 0.90625, "num_input_tokens_seen": 39377644, "step": 628 }, { "epoch": 2.093178036605657, "grad_norm": 20.721181869506836, "learning_rate": 5e-06, "loss": 1.0455, "num_input_tokens_seen": 39441688, "step": 629 }, { "epoch": 2.093178036605657, "loss": 1.0174716711044312, "loss_ce": 0.004288055002689362, "loss_iou": 0.328125, "loss_num": 0.07177734375, "loss_xval": 1.015625, "num_input_tokens_seen": 39441688, "step": 629 }, { "epoch": 2.0965058236272878, "grad_norm": 12.86941909790039, "learning_rate": 5e-06, "loss": 0.7168, "num_input_tokens_seen": 39503164, "step": 630 }, { "epoch": 2.0965058236272878, "loss": 0.9291834831237793, "loss_ce": 0.0034022473264485598, "loss_iou": 0.330078125, "loss_num": 0.05322265625, "loss_xval": 0.92578125, "num_input_tokens_seen": 39503164, "step": 630 }, { "epoch": 2.0998336106489184, "grad_norm": 10.400750160217285, "learning_rate": 5e-06, "loss": 0.8992, "num_input_tokens_seen": 39566940, "step": 631 }, { "epoch": 2.0998336106489184, "loss": 0.94353187084198, "loss_ce": 0.001393245765939355, "loss_iou": 0.349609375, "loss_num": 0.048095703125, "loss_xval": 0.94140625, "num_input_tokens_seen": 39566940, "step": 631 }, { "epoch": 2.103161397670549, "grad_norm": 9.461636543273926, "learning_rate": 5e-06, "loss": 0.6036, "num_input_tokens_seen": 39629024, "step": 632 }, { "epoch": 2.103161397670549, "loss": 0.33993566036224365, "loss_ce": 0.006317485123872757, "loss_iou": 0.06982421875, "loss_num": 0.038818359375, "loss_xval": 0.333984375, "num_input_tokens_seen": 39629024, "step": 632 }, { "epoch": 2.1064891846921796, "grad_norm": 12.25512981414795, "learning_rate": 5e-06, "loss": 0.6903, "num_input_tokens_seen": 39691612, "step": 633 }, { "epoch": 2.1064891846921796, "loss": 0.667500376701355, "loss_ce": 1.9936600438086316e-05, "loss_iou": 0.2314453125, "loss_num": 0.041015625, "loss_xval": 0.66796875, "num_input_tokens_seen": 39691612, "step": 633 }, { "epoch": 2.10981697171381, "grad_norm": 12.845260620117188, "learning_rate": 5e-06, "loss": 1.0146, "num_input_tokens_seen": 39753308, "step": 634 }, { "epoch": 2.10981697171381, "loss": 0.9783458113670349, "loss_ce": 0.0008067445596680045, "loss_iou": 0.271484375, "loss_num": 0.08642578125, "loss_xval": 0.9765625, "num_input_tokens_seen": 39753308, "step": 634 }, { "epoch": 2.113144758735441, "grad_norm": 12.062701225280762, "learning_rate": 5e-06, "loss": 0.9782, "num_input_tokens_seen": 39815888, "step": 635 }, { "epoch": 2.113144758735441, "loss": 1.166348934173584, "loss_ce": 0.0003332831838633865, "loss_iou": 0.291015625, "loss_num": 0.11669921875, "loss_xval": 1.1640625, "num_input_tokens_seen": 39815888, "step": 635 }, { "epoch": 2.1164725457570714, "grad_norm": 13.48031234741211, "learning_rate": 5e-06, "loss": 0.9052, "num_input_tokens_seen": 39877392, "step": 636 }, { "epoch": 2.1164725457570714, "loss": 0.8594918251037598, "loss_ce": 0.0008491812041029334, "loss_iou": 0.21484375, "loss_num": 0.0859375, "loss_xval": 0.859375, "num_input_tokens_seen": 39877392, "step": 636 }, { "epoch": 2.119800332778702, "grad_norm": 11.958724975585938, "learning_rate": 5e-06, "loss": 0.8913, "num_input_tokens_seen": 39940516, "step": 637 }, { "epoch": 2.119800332778702, "loss": 0.792966902256012, "loss_ce": 0.009763746522367, "loss_iou": 0.2392578125, "loss_num": 0.06103515625, "loss_xval": 0.78125, "num_input_tokens_seen": 39940516, "step": 637 }, { "epoch": 2.1231281198003327, "grad_norm": 15.40645694732666, "learning_rate": 5e-06, "loss": 0.718, "num_input_tokens_seen": 40003388, "step": 638 }, { "epoch": 2.1231281198003327, "loss": 0.7531487941741943, "loss_ce": 0.00021912308875471354, "loss_iou": 0.28125, "loss_num": 0.0380859375, "loss_xval": 0.75390625, "num_input_tokens_seen": 40003388, "step": 638 }, { "epoch": 2.1264559068219633, "grad_norm": 32.53299331665039, "learning_rate": 5e-06, "loss": 1.1154, "num_input_tokens_seen": 40066328, "step": 639 }, { "epoch": 2.1264559068219633, "loss": 1.099750280380249, "loss_ce": 0.0001409617834724486, "loss_iou": 0.396484375, "loss_num": 0.061279296875, "loss_xval": 1.1015625, "num_input_tokens_seen": 40066328, "step": 639 }, { "epoch": 2.129783693843594, "grad_norm": 33.41072082519531, "learning_rate": 5e-06, "loss": 0.8616, "num_input_tokens_seen": 40127316, "step": 640 }, { "epoch": 2.129783693843594, "loss": 0.9770931601524353, "loss_ce": 0.0015072214882820845, "loss_iou": 0.28125, "loss_num": 0.0830078125, "loss_xval": 0.9765625, "num_input_tokens_seen": 40127316, "step": 640 }, { "epoch": 2.1331114808652245, "grad_norm": 13.928618431091309, "learning_rate": 5e-06, "loss": 1.0466, "num_input_tokens_seen": 40190528, "step": 641 }, { "epoch": 2.1331114808652245, "loss": 1.031181812286377, "loss_ce": 0.0015187868848443031, "loss_iou": 0.341796875, "loss_num": 0.0693359375, "loss_xval": 1.03125, "num_input_tokens_seen": 40190528, "step": 641 }, { "epoch": 2.136439267886855, "grad_norm": 19.280794143676758, "learning_rate": 5e-06, "loss": 0.9631, "num_input_tokens_seen": 40252724, "step": 642 }, { "epoch": 2.136439267886855, "loss": 0.7468453645706177, "loss_ce": 0.0005074806977063417, "loss_iou": 0.26171875, "loss_num": 0.04443359375, "loss_xval": 0.74609375, "num_input_tokens_seen": 40252724, "step": 642 }, { "epoch": 2.1397670549084857, "grad_norm": 9.668257713317871, "learning_rate": 5e-06, "loss": 0.8991, "num_input_tokens_seen": 40315860, "step": 643 }, { "epoch": 2.1397670549084857, "loss": 0.768240213394165, "loss_ce": 0.0001738157297950238, "loss_iou": 0.30859375, "loss_num": 0.0302734375, "loss_xval": 0.76953125, "num_input_tokens_seen": 40315860, "step": 643 }, { "epoch": 2.1430948419301163, "grad_norm": 16.013214111328125, "learning_rate": 5e-06, "loss": 0.97, "num_input_tokens_seen": 40379176, "step": 644 }, { "epoch": 2.1430948419301163, "loss": 0.7990410327911377, "loss_ce": 0.001677798223681748, "loss_iou": 0.2353515625, "loss_num": 0.0654296875, "loss_xval": 0.796875, "num_input_tokens_seen": 40379176, "step": 644 }, { "epoch": 2.146422628951747, "grad_norm": 8.998737335205078, "learning_rate": 5e-06, "loss": 0.8889, "num_input_tokens_seen": 40440688, "step": 645 }, { "epoch": 2.146422628951747, "loss": 1.1174098253250122, "loss_ce": 0.00022229723981581628, "loss_iou": 0.2490234375, "loss_num": 0.1240234375, "loss_xval": 1.1171875, "num_input_tokens_seen": 40440688, "step": 645 }, { "epoch": 2.1497504159733776, "grad_norm": 12.142661094665527, "learning_rate": 5e-06, "loss": 0.7721, "num_input_tokens_seen": 40502032, "step": 646 }, { "epoch": 2.1497504159733776, "loss": 0.9419033527374268, "loss_ce": 8.781585165706929e-06, "loss_iou": 0.255859375, "loss_num": 0.08544921875, "loss_xval": 0.94140625, "num_input_tokens_seen": 40502032, "step": 646 }, { "epoch": 2.153078202995008, "grad_norm": 12.413509368896484, "learning_rate": 5e-06, "loss": 0.6306, "num_input_tokens_seen": 40563216, "step": 647 }, { "epoch": 2.153078202995008, "loss": 0.6236965656280518, "loss_ce": 0.0004055765166413039, "loss_iou": 0.15625, "loss_num": 0.062255859375, "loss_xval": 0.625, "num_input_tokens_seen": 40563216, "step": 647 }, { "epoch": 2.156405990016639, "grad_norm": 12.308266639709473, "learning_rate": 5e-06, "loss": 1.0149, "num_input_tokens_seen": 40626760, "step": 648 }, { "epoch": 2.156405990016639, "loss": 1.0296001434326172, "loss_ce": 0.0006693830946460366, "loss_iou": 0.328125, "loss_num": 0.07421875, "loss_xval": 1.03125, "num_input_tokens_seen": 40626760, "step": 648 }, { "epoch": 2.1597337770382694, "grad_norm": 8.219746589660645, "learning_rate": 5e-06, "loss": 1.0916, "num_input_tokens_seen": 40689820, "step": 649 }, { "epoch": 2.1597337770382694, "loss": 1.043705940246582, "loss_ce": 0.0009814061922952533, "loss_iou": 0.333984375, "loss_num": 0.07470703125, "loss_xval": 1.0390625, "num_input_tokens_seen": 40689820, "step": 649 }, { "epoch": 2.1630615640599, "grad_norm": 16.483530044555664, "learning_rate": 5e-06, "loss": 1.3246, "num_input_tokens_seen": 40753476, "step": 650 }, { "epoch": 2.1630615640599, "loss": 1.3722883462905884, "loss_ce": 0.0011946188751608133, "loss_iou": 0.466796875, "loss_num": 0.08740234375, "loss_xval": 1.375, "num_input_tokens_seen": 40753476, "step": 650 }, { "epoch": 2.1663893510815306, "grad_norm": 15.433051109313965, "learning_rate": 5e-06, "loss": 0.8102, "num_input_tokens_seen": 40816652, "step": 651 }, { "epoch": 2.1663893510815306, "loss": 0.710527241230011, "loss_ce": 0.0022752864751964808, "loss_iou": 0.251953125, "loss_num": 0.040771484375, "loss_xval": 0.70703125, "num_input_tokens_seen": 40816652, "step": 651 }, { "epoch": 2.1697171381031612, "grad_norm": 6.271609306335449, "learning_rate": 5e-06, "loss": 0.4691, "num_input_tokens_seen": 40877216, "step": 652 }, { "epoch": 2.1697171381031612, "loss": 0.44094425439834595, "loss_ce": 2.6259316655341536e-05, "loss_iou": 0.07275390625, "loss_num": 0.05908203125, "loss_xval": 0.44140625, "num_input_tokens_seen": 40877216, "step": 652 }, { "epoch": 2.173044925124792, "grad_norm": 13.27111530303955, "learning_rate": 5e-06, "loss": 0.815, "num_input_tokens_seen": 40938780, "step": 653 }, { "epoch": 2.173044925124792, "loss": 0.9975978136062622, "loss_ce": 0.000771641731262207, "loss_iou": 0.283203125, "loss_num": 0.0859375, "loss_xval": 0.99609375, "num_input_tokens_seen": 40938780, "step": 653 }, { "epoch": 2.1763727121464225, "grad_norm": 13.250811576843262, "learning_rate": 5e-06, "loss": 0.8086, "num_input_tokens_seen": 41002692, "step": 654 }, { "epoch": 2.1763727121464225, "loss": 0.5700994729995728, "loss_ce": 0.00027528181090019643, "loss_iou": 0.15234375, "loss_num": 0.052978515625, "loss_xval": 0.5703125, "num_input_tokens_seen": 41002692, "step": 654 }, { "epoch": 2.179700499168053, "grad_norm": 43.69986343383789, "learning_rate": 5e-06, "loss": 1.2292, "num_input_tokens_seen": 41065588, "step": 655 }, { "epoch": 2.179700499168053, "loss": 1.126779317855835, "loss_ce": 0.0010468140244483948, "loss_iou": 0.38671875, "loss_num": 0.07080078125, "loss_xval": 1.125, "num_input_tokens_seen": 41065588, "step": 655 }, { "epoch": 2.1830282861896837, "grad_norm": 23.3741455078125, "learning_rate": 5e-06, "loss": 0.7644, "num_input_tokens_seen": 41127908, "step": 656 }, { "epoch": 2.1830282861896837, "loss": 0.8155651092529297, "loss_ce": 0.0005016563227400184, "loss_iou": 0.2373046875, "loss_num": 0.06787109375, "loss_xval": 0.81640625, "num_input_tokens_seen": 41127908, "step": 656 }, { "epoch": 2.1863560732113143, "grad_norm": 16.306617736816406, "learning_rate": 5e-06, "loss": 1.0647, "num_input_tokens_seen": 41190336, "step": 657 }, { "epoch": 2.1863560732113143, "loss": 1.1568994522094727, "loss_ce": 0.00016120978398248553, "loss_iou": 0.3828125, "loss_num": 0.07763671875, "loss_xval": 1.15625, "num_input_tokens_seen": 41190336, "step": 657 }, { "epoch": 2.189683860232945, "grad_norm": 11.455361366271973, "learning_rate": 5e-06, "loss": 0.7928, "num_input_tokens_seen": 41252080, "step": 658 }, { "epoch": 2.189683860232945, "loss": 0.7935343980789185, "loss_ce": 0.0005656481371261179, "loss_iou": 0.220703125, "loss_num": 0.0703125, "loss_xval": 0.79296875, "num_input_tokens_seen": 41252080, "step": 658 }, { "epoch": 2.1930116472545755, "grad_norm": 14.715596199035645, "learning_rate": 5e-06, "loss": 0.9227, "num_input_tokens_seen": 41314652, "step": 659 }, { "epoch": 2.1930116472545755, "loss": 0.932908833026886, "loss_ce": 4.7503406676696613e-05, "loss_iou": 0.302734375, "loss_num": 0.0654296875, "loss_xval": 0.93359375, "num_input_tokens_seen": 41314652, "step": 659 }, { "epoch": 2.196339434276206, "grad_norm": 31.348485946655273, "learning_rate": 5e-06, "loss": 0.9761, "num_input_tokens_seen": 41377348, "step": 660 }, { "epoch": 2.196339434276206, "loss": 0.9302453398704529, "loss_ce": 6.955982826184481e-05, "loss_iou": 0.314453125, "loss_num": 0.060302734375, "loss_xval": 0.9296875, "num_input_tokens_seen": 41377348, "step": 660 }, { "epoch": 2.1996672212978368, "grad_norm": 35.344242095947266, "learning_rate": 5e-06, "loss": 1.0837, "num_input_tokens_seen": 41440992, "step": 661 }, { "epoch": 2.1996672212978368, "loss": 0.8363819122314453, "loss_ce": 0.00020026677520945668, "loss_iou": 0.283203125, "loss_num": 0.0537109375, "loss_xval": 0.8359375, "num_input_tokens_seen": 41440992, "step": 661 }, { "epoch": 2.2029950083194674, "grad_norm": 26.1990909576416, "learning_rate": 5e-06, "loss": 0.8925, "num_input_tokens_seen": 41504132, "step": 662 }, { "epoch": 2.2029950083194674, "loss": 0.7424072027206421, "loss_ce": 0.00046388505143113434, "loss_iou": 0.244140625, "loss_num": 0.05078125, "loss_xval": 0.7421875, "num_input_tokens_seen": 41504132, "step": 662 }, { "epoch": 2.206322795341098, "grad_norm": 9.237807273864746, "learning_rate": 5e-06, "loss": 0.6939, "num_input_tokens_seen": 41565620, "step": 663 }, { "epoch": 2.206322795341098, "loss": 0.7877798080444336, "loss_ce": 6.009737990098074e-05, "loss_iou": 0.205078125, "loss_num": 0.07568359375, "loss_xval": 0.7890625, "num_input_tokens_seen": 41565620, "step": 663 }, { "epoch": 2.2096505823627286, "grad_norm": 15.479573249816895, "learning_rate": 5e-06, "loss": 1.0272, "num_input_tokens_seen": 41629236, "step": 664 }, { "epoch": 2.2096505823627286, "loss": 1.0712847709655762, "loss_ce": 0.00048389771836809814, "loss_iou": 0.33203125, "loss_num": 0.0810546875, "loss_xval": 1.0703125, "num_input_tokens_seen": 41629236, "step": 664 }, { "epoch": 2.212978369384359, "grad_norm": 32.5822868347168, "learning_rate": 5e-06, "loss": 0.9038, "num_input_tokens_seen": 41692904, "step": 665 }, { "epoch": 2.212978369384359, "loss": 0.9757400751113892, "loss_ce": 0.0011306863743811846, "loss_iou": 0.2890625, "loss_num": 0.07958984375, "loss_xval": 0.9765625, "num_input_tokens_seen": 41692904, "step": 665 }, { "epoch": 2.21630615640599, "grad_norm": 15.892915725708008, "learning_rate": 5e-06, "loss": 0.9478, "num_input_tokens_seen": 41755028, "step": 666 }, { "epoch": 2.21630615640599, "loss": 1.2628767490386963, "loss_ce": 0.0004256761458236724, "loss_iou": 0.42578125, "loss_num": 0.08251953125, "loss_xval": 1.265625, "num_input_tokens_seen": 41755028, "step": 666 }, { "epoch": 2.2196339434276204, "grad_norm": 32.91965103149414, "learning_rate": 5e-06, "loss": 1.0484, "num_input_tokens_seen": 41819808, "step": 667 }, { "epoch": 2.2196339434276204, "loss": 1.0486059188842773, "loss_ce": 0.001242599100805819, "loss_iou": 0.34765625, "loss_num": 0.07080078125, "loss_xval": 1.046875, "num_input_tokens_seen": 41819808, "step": 667 }, { "epoch": 2.222961730449251, "grad_norm": 6.644551753997803, "learning_rate": 5e-06, "loss": 0.8515, "num_input_tokens_seen": 41883088, "step": 668 }, { "epoch": 2.222961730449251, "loss": 0.8012328147888184, "loss_ce": 0.00032947398722171783, "loss_iou": 0.205078125, "loss_num": 0.078125, "loss_xval": 0.80078125, "num_input_tokens_seen": 41883088, "step": 668 }, { "epoch": 2.2262895174708817, "grad_norm": 19.913354873657227, "learning_rate": 5e-06, "loss": 0.9162, "num_input_tokens_seen": 41946620, "step": 669 }, { "epoch": 2.2262895174708817, "loss": 0.6975362300872803, "loss_ce": 0.0007588434964418411, "loss_iou": 0.236328125, "loss_num": 0.04443359375, "loss_xval": 0.6953125, "num_input_tokens_seen": 41946620, "step": 669 }, { "epoch": 2.2296173044925123, "grad_norm": 25.868154525756836, "learning_rate": 5e-06, "loss": 0.8179, "num_input_tokens_seen": 42009140, "step": 670 }, { "epoch": 2.2296173044925123, "loss": 0.7486177086830139, "loss_ce": 8.254170825239271e-05, "loss_iou": 0.275390625, "loss_num": 0.03955078125, "loss_xval": 0.75, "num_input_tokens_seen": 42009140, "step": 670 }, { "epoch": 2.232945091514143, "grad_norm": 16.261491775512695, "learning_rate": 5e-06, "loss": 0.8723, "num_input_tokens_seen": 42069640, "step": 671 }, { "epoch": 2.232945091514143, "loss": 1.1612319946289062, "loss_ce": 0.0005875469068996608, "loss_iou": 0.361328125, "loss_num": 0.087890625, "loss_xval": 1.1640625, "num_input_tokens_seen": 42069640, "step": 671 }, { "epoch": 2.2362728785357735, "grad_norm": 9.516688346862793, "learning_rate": 5e-06, "loss": 0.8249, "num_input_tokens_seen": 42132272, "step": 672 }, { "epoch": 2.2362728785357735, "loss": 0.815399706363678, "loss_ce": 0.0001836877636378631, "loss_iou": 0.3046875, "loss_num": 0.040771484375, "loss_xval": 0.81640625, "num_input_tokens_seen": 42132272, "step": 672 }, { "epoch": 2.239600665557404, "grad_norm": 12.779966354370117, "learning_rate": 5e-06, "loss": 0.7785, "num_input_tokens_seen": 42194892, "step": 673 }, { "epoch": 2.239600665557404, "loss": 0.6607878804206848, "loss_ce": 0.00014333476428873837, "loss_iou": 0.15625, "loss_num": 0.0693359375, "loss_xval": 0.66015625, "num_input_tokens_seen": 42194892, "step": 673 }, { "epoch": 2.2429284525790347, "grad_norm": 10.946259498596191, "learning_rate": 5e-06, "loss": 0.7948, "num_input_tokens_seen": 42256156, "step": 674 }, { "epoch": 2.2429284525790347, "loss": 0.764301061630249, "loss_ce": 0.0003850538341794163, "loss_iou": 0.232421875, "loss_num": 0.06005859375, "loss_xval": 0.765625, "num_input_tokens_seen": 42256156, "step": 674 }, { "epoch": 2.2462562396006653, "grad_norm": 9.564322471618652, "learning_rate": 5e-06, "loss": 0.6524, "num_input_tokens_seen": 42318292, "step": 675 }, { "epoch": 2.2462562396006653, "loss": 0.6682754158973694, "loss_ce": 6.24852254986763e-05, "loss_iou": 0.234375, "loss_num": 0.0400390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 42318292, "step": 675 }, { "epoch": 2.249584026622296, "grad_norm": 13.924878120422363, "learning_rate": 5e-06, "loss": 0.8124, "num_input_tokens_seen": 42377848, "step": 676 }, { "epoch": 2.249584026622296, "loss": 0.6967849731445312, "loss_ce": 0.0017471231985837221, "loss_iou": 0.212890625, "loss_num": 0.0537109375, "loss_xval": 0.6953125, "num_input_tokens_seen": 42377848, "step": 676 }, { "epoch": 2.2529118136439266, "grad_norm": 16.81814956665039, "learning_rate": 5e-06, "loss": 0.902, "num_input_tokens_seen": 42439512, "step": 677 }, { "epoch": 2.2529118136439266, "loss": 0.8860781788825989, "loss_ce": 0.00033597912988625467, "loss_iou": 0.279296875, "loss_num": 0.06591796875, "loss_xval": 0.88671875, "num_input_tokens_seen": 42439512, "step": 677 }, { "epoch": 2.256239600665557, "grad_norm": 11.292144775390625, "learning_rate": 5e-06, "loss": 1.0798, "num_input_tokens_seen": 42502844, "step": 678 }, { "epoch": 2.256239600665557, "loss": 1.1715800762176514, "loss_ce": 0.00019335387332830578, "loss_iou": 0.392578125, "loss_num": 0.0771484375, "loss_xval": 1.171875, "num_input_tokens_seen": 42502844, "step": 678 }, { "epoch": 2.259567387687188, "grad_norm": 7.578566551208496, "learning_rate": 5e-06, "loss": 1.047, "num_input_tokens_seen": 42565372, "step": 679 }, { "epoch": 2.259567387687188, "loss": 1.0372211933135986, "loss_ce": 0.0010883715003728867, "loss_iou": 0.365234375, "loss_num": 0.0615234375, "loss_xval": 1.0390625, "num_input_tokens_seen": 42565372, "step": 679 }, { "epoch": 2.2628951747088184, "grad_norm": 17.18895721435547, "learning_rate": 5e-06, "loss": 0.9563, "num_input_tokens_seen": 42627684, "step": 680 }, { "epoch": 2.2628951747088184, "loss": 1.0924670696258545, "loss_ce": 0.0006701992242597044, "loss_iou": 0.34375, "loss_num": 0.08056640625, "loss_xval": 1.09375, "num_input_tokens_seen": 42627684, "step": 680 }, { "epoch": 2.266222961730449, "grad_norm": 9.680091857910156, "learning_rate": 5e-06, "loss": 1.0404, "num_input_tokens_seen": 42691788, "step": 681 }, { "epoch": 2.266222961730449, "loss": 0.8576643466949463, "loss_ce": 0.00048660385073162615, "loss_iou": 0.310546875, "loss_num": 0.046875, "loss_xval": 0.85546875, "num_input_tokens_seen": 42691788, "step": 681 }, { "epoch": 2.2695507487520796, "grad_norm": 23.31703758239746, "learning_rate": 5e-06, "loss": 0.8207, "num_input_tokens_seen": 42754224, "step": 682 }, { "epoch": 2.2695507487520796, "loss": 0.6044555902481079, "loss_ce": 8.549916674382985e-05, "loss_iou": 0.1826171875, "loss_num": 0.0478515625, "loss_xval": 0.60546875, "num_input_tokens_seen": 42754224, "step": 682 }, { "epoch": 2.2728785357737102, "grad_norm": 10.370177268981934, "learning_rate": 5e-06, "loss": 0.9969, "num_input_tokens_seen": 42817536, "step": 683 }, { "epoch": 2.2728785357737102, "loss": 1.0521934032440186, "loss_ce": 0.0008017598884180188, "loss_iou": 0.310546875, "loss_num": 0.08544921875, "loss_xval": 1.0546875, "num_input_tokens_seen": 42817536, "step": 683 }, { "epoch": 2.276206322795341, "grad_norm": 11.924843788146973, "learning_rate": 5e-06, "loss": 1.1041, "num_input_tokens_seen": 42880584, "step": 684 }, { "epoch": 2.276206322795341, "loss": 0.924105703830719, "loss_ce": 0.0005217483267188072, "loss_iou": 0.287109375, "loss_num": 0.0703125, "loss_xval": 0.921875, "num_input_tokens_seen": 42880584, "step": 684 }, { "epoch": 2.2795341098169715, "grad_norm": 12.28469467163086, "learning_rate": 5e-06, "loss": 0.7665, "num_input_tokens_seen": 42943528, "step": 685 }, { "epoch": 2.2795341098169715, "loss": 0.8234906196594238, "loss_ce": 0.00012639828491955996, "loss_iou": 0.267578125, "loss_num": 0.05712890625, "loss_xval": 0.82421875, "num_input_tokens_seen": 42943528, "step": 685 }, { "epoch": 2.2828618968386025, "grad_norm": 38.641693115234375, "learning_rate": 5e-06, "loss": 0.9702, "num_input_tokens_seen": 43007544, "step": 686 }, { "epoch": 2.2828618968386025, "loss": 0.8214001655578613, "loss_ce": 0.00011106484453193843, "loss_iou": 0.306640625, "loss_num": 0.0419921875, "loss_xval": 0.8203125, "num_input_tokens_seen": 43007544, "step": 686 }, { "epoch": 2.286189683860233, "grad_norm": 22.426542282104492, "learning_rate": 5e-06, "loss": 0.6703, "num_input_tokens_seen": 43069176, "step": 687 }, { "epoch": 2.286189683860233, "loss": 0.6367377042770386, "loss_ce": 0.0007513945456594229, "loss_iou": 0.2216796875, "loss_num": 0.03857421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 43069176, "step": 687 }, { "epoch": 2.2895174708818637, "grad_norm": 27.776212692260742, "learning_rate": 5e-06, "loss": 0.798, "num_input_tokens_seen": 43133544, "step": 688 }, { "epoch": 2.2895174708818637, "loss": 0.882362961769104, "loss_ce": 0.00016075739404186606, "loss_iou": 0.31640625, "loss_num": 0.050048828125, "loss_xval": 0.8828125, "num_input_tokens_seen": 43133544, "step": 688 }, { "epoch": 2.2928452579034944, "grad_norm": 9.201436996459961, "learning_rate": 5e-06, "loss": 1.0724, "num_input_tokens_seen": 43197028, "step": 689 }, { "epoch": 2.2928452579034944, "loss": 1.2933428287506104, "loss_ce": 0.001350653124973178, "loss_iou": 0.453125, "loss_num": 0.07666015625, "loss_xval": 1.2890625, "num_input_tokens_seen": 43197028, "step": 689 }, { "epoch": 2.296173044925125, "grad_norm": 11.568137168884277, "learning_rate": 5e-06, "loss": 0.728, "num_input_tokens_seen": 43260444, "step": 690 }, { "epoch": 2.296173044925125, "loss": 0.6836668252944946, "loss_ce": 0.001293812645599246, "loss_iou": 0.2470703125, "loss_num": 0.03759765625, "loss_xval": 0.68359375, "num_input_tokens_seen": 43260444, "step": 690 }, { "epoch": 2.2995008319467556, "grad_norm": 14.878931999206543, "learning_rate": 5e-06, "loss": 1.0874, "num_input_tokens_seen": 43324336, "step": 691 }, { "epoch": 2.2995008319467556, "loss": 1.3032047748565674, "loss_ce": 0.0009586900705471635, "loss_iou": 0.3984375, "loss_num": 0.1005859375, "loss_xval": 1.3046875, "num_input_tokens_seen": 43324336, "step": 691 }, { "epoch": 2.302828618968386, "grad_norm": 15.7121000289917, "learning_rate": 5e-06, "loss": 0.6999, "num_input_tokens_seen": 43386868, "step": 692 }, { "epoch": 2.302828618968386, "loss": 0.6661006212234497, "loss_ce": 0.00032910649315454066, "loss_iou": 0.220703125, "loss_num": 0.044677734375, "loss_xval": 0.6640625, "num_input_tokens_seen": 43386868, "step": 692 }, { "epoch": 2.306156405990017, "grad_norm": 10.086871147155762, "learning_rate": 5e-06, "loss": 0.7448, "num_input_tokens_seen": 43449464, "step": 693 }, { "epoch": 2.306156405990017, "loss": 0.49396997690200806, "loss_ce": 0.0011721242917701602, "loss_iou": 0.1337890625, "loss_num": 0.044921875, "loss_xval": 0.4921875, "num_input_tokens_seen": 43449464, "step": 693 }, { "epoch": 2.3094841930116474, "grad_norm": 29.50054931640625, "learning_rate": 5e-06, "loss": 0.9566, "num_input_tokens_seen": 43513148, "step": 694 }, { "epoch": 2.3094841930116474, "loss": 1.0682692527770996, "loss_ce": 0.0008863506955094635, "loss_iou": 0.3671875, "loss_num": 0.06640625, "loss_xval": 1.0703125, "num_input_tokens_seen": 43513148, "step": 694 }, { "epoch": 2.312811980033278, "grad_norm": 11.244392395019531, "learning_rate": 5e-06, "loss": 0.7949, "num_input_tokens_seen": 43572220, "step": 695 }, { "epoch": 2.312811980033278, "loss": 0.8442493677139282, "loss_ce": 0.00025524571537971497, "loss_iou": 0.275390625, "loss_num": 0.05859375, "loss_xval": 0.84375, "num_input_tokens_seen": 43572220, "step": 695 }, { "epoch": 2.3161397670549086, "grad_norm": 34.46859359741211, "learning_rate": 5e-06, "loss": 0.8627, "num_input_tokens_seen": 43635076, "step": 696 }, { "epoch": 2.3161397670549086, "loss": 0.9726455211639404, "loss_ce": 0.00023341998166870326, "loss_iou": 0.31640625, "loss_num": 0.06787109375, "loss_xval": 0.97265625, "num_input_tokens_seen": 43635076, "step": 696 }, { "epoch": 2.3194675540765393, "grad_norm": 32.053955078125, "learning_rate": 5e-06, "loss": 1.0028, "num_input_tokens_seen": 43698032, "step": 697 }, { "epoch": 2.3194675540765393, "loss": 0.7314814925193787, "loss_ce": 3.620800271164626e-05, "loss_iou": 0.271484375, "loss_num": 0.037353515625, "loss_xval": 0.73046875, "num_input_tokens_seen": 43698032, "step": 697 }, { "epoch": 2.32279534109817, "grad_norm": 14.377496719360352, "learning_rate": 5e-06, "loss": 0.7126, "num_input_tokens_seen": 43759968, "step": 698 }, { "epoch": 2.32279534109817, "loss": 0.6350549459457397, "loss_ce": 0.0002893089840654284, "loss_iou": 0.21484375, "loss_num": 0.040771484375, "loss_xval": 0.6328125, "num_input_tokens_seen": 43759968, "step": 698 }, { "epoch": 2.3261231281198005, "grad_norm": 10.184778213500977, "learning_rate": 5e-06, "loss": 0.8103, "num_input_tokens_seen": 43823276, "step": 699 }, { "epoch": 2.3261231281198005, "loss": 0.973152220249176, "loss_ce": 0.0004960413789376616, "loss_iou": 0.306640625, "loss_num": 0.07177734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 43823276, "step": 699 }, { "epoch": 2.329450915141431, "grad_norm": 10.185461044311523, "learning_rate": 5e-06, "loss": 0.9135, "num_input_tokens_seen": 43886252, "step": 700 }, { "epoch": 2.329450915141431, "loss": 0.7423095107078552, "loss_ce": 0.0008544151787646115, "loss_iou": 0.240234375, "loss_num": 0.05224609375, "loss_xval": 0.7421875, "num_input_tokens_seen": 43886252, "step": 700 }, { "epoch": 2.3327787021630617, "grad_norm": 14.283385276794434, "learning_rate": 5e-06, "loss": 0.8628, "num_input_tokens_seen": 43950104, "step": 701 }, { "epoch": 2.3327787021630617, "loss": 0.9078635573387146, "loss_ce": 2.6576359232421964e-05, "loss_iou": 0.283203125, "loss_num": 0.06787109375, "loss_xval": 0.90625, "num_input_tokens_seen": 43950104, "step": 701 }, { "epoch": 2.3361064891846923, "grad_norm": 17.961149215698242, "learning_rate": 5e-06, "loss": 0.8288, "num_input_tokens_seen": 44013476, "step": 702 }, { "epoch": 2.3361064891846923, "loss": 0.6979770660400391, "loss_ce": 0.0004673382209148258, "loss_iou": 0.20703125, "loss_num": 0.056640625, "loss_xval": 0.69921875, "num_input_tokens_seen": 44013476, "step": 702 }, { "epoch": 2.339434276206323, "grad_norm": 16.723167419433594, "learning_rate": 5e-06, "loss": 0.8697, "num_input_tokens_seen": 44078112, "step": 703 }, { "epoch": 2.339434276206323, "loss": 0.748289167881012, "loss_ce": 0.000974722090177238, "loss_iou": 0.283203125, "loss_num": 0.036376953125, "loss_xval": 0.74609375, "num_input_tokens_seen": 44078112, "step": 703 }, { "epoch": 2.3427620632279536, "grad_norm": 22.591569900512695, "learning_rate": 5e-06, "loss": 0.8053, "num_input_tokens_seen": 44140928, "step": 704 }, { "epoch": 2.3427620632279536, "loss": 0.524970293045044, "loss_ce": 0.000800370064098388, "loss_iou": 0.1708984375, "loss_num": 0.036376953125, "loss_xval": 0.5234375, "num_input_tokens_seen": 44140928, "step": 704 }, { "epoch": 2.346089850249584, "grad_norm": 49.59932327270508, "learning_rate": 5e-06, "loss": 1.0129, "num_input_tokens_seen": 44203212, "step": 705 }, { "epoch": 2.346089850249584, "loss": 0.6107718348503113, "loss_ce": 5.408008291851729e-05, "loss_iou": 0.185546875, "loss_num": 0.048095703125, "loss_xval": 0.609375, "num_input_tokens_seen": 44203212, "step": 705 }, { "epoch": 2.3494176372712148, "grad_norm": 12.25649642944336, "learning_rate": 5e-06, "loss": 1.0834, "num_input_tokens_seen": 44267888, "step": 706 }, { "epoch": 2.3494176372712148, "loss": 1.069253921508789, "loss_ce": 0.0007725384784862399, "loss_iou": 0.36328125, "loss_num": 0.068359375, "loss_xval": 1.0703125, "num_input_tokens_seen": 44267888, "step": 706 }, { "epoch": 2.3527454242928454, "grad_norm": 8.59157943725586, "learning_rate": 5e-06, "loss": 0.9064, "num_input_tokens_seen": 44331788, "step": 707 }, { "epoch": 2.3527454242928454, "loss": 0.8050553798675537, "loss_ce": 0.0013445127988234162, "loss_iou": 0.287109375, "loss_num": 0.0458984375, "loss_xval": 0.8046875, "num_input_tokens_seen": 44331788, "step": 707 }, { "epoch": 2.356073211314476, "grad_norm": 13.44419002532959, "learning_rate": 5e-06, "loss": 0.8174, "num_input_tokens_seen": 44394832, "step": 708 }, { "epoch": 2.356073211314476, "loss": 0.922235906124115, "loss_ce": 0.0008491869666613638, "loss_iou": 0.341796875, "loss_num": 0.0478515625, "loss_xval": 0.921875, "num_input_tokens_seen": 44394832, "step": 708 }, { "epoch": 2.3594009983361066, "grad_norm": 9.479907989501953, "learning_rate": 5e-06, "loss": 1.0606, "num_input_tokens_seen": 44459416, "step": 709 }, { "epoch": 2.3594009983361066, "loss": 0.9726129770278931, "loss_ce": 0.0014215761329978704, "loss_iou": 0.361328125, "loss_num": 0.05029296875, "loss_xval": 0.97265625, "num_input_tokens_seen": 44459416, "step": 709 }, { "epoch": 2.3627287853577372, "grad_norm": 7.282090187072754, "learning_rate": 5e-06, "loss": 0.5709, "num_input_tokens_seen": 44519432, "step": 710 }, { "epoch": 2.3627287853577372, "loss": 0.7231759428977966, "loss_ce": 0.0001534855255158618, "loss_iou": 0.1806640625, "loss_num": 0.072265625, "loss_xval": 0.72265625, "num_input_tokens_seen": 44519432, "step": 710 }, { "epoch": 2.366056572379368, "grad_norm": 10.938211441040039, "learning_rate": 5e-06, "loss": 0.9808, "num_input_tokens_seen": 44583072, "step": 711 }, { "epoch": 2.366056572379368, "loss": 0.8792444467544556, "loss_ce": 0.0010706019820645452, "loss_iou": 0.30078125, "loss_num": 0.0556640625, "loss_xval": 0.87890625, "num_input_tokens_seen": 44583072, "step": 711 }, { "epoch": 2.3693843594009985, "grad_norm": 15.567341804504395, "learning_rate": 5e-06, "loss": 0.9657, "num_input_tokens_seen": 44646108, "step": 712 }, { "epoch": 2.3693843594009985, "loss": 1.0545854568481445, "loss_ce": 0.0018511018715798855, "loss_iou": 0.390625, "loss_num": 0.05419921875, "loss_xval": 1.0546875, "num_input_tokens_seen": 44646108, "step": 712 }, { "epoch": 2.372712146422629, "grad_norm": 10.228503227233887, "learning_rate": 5e-06, "loss": 0.9219, "num_input_tokens_seen": 44708204, "step": 713 }, { "epoch": 2.372712146422629, "loss": 1.1827502250671387, "loss_ce": 0.0006212661974132061, "loss_iou": 0.40625, "loss_num": 0.07421875, "loss_xval": 1.1796875, "num_input_tokens_seen": 44708204, "step": 713 }, { "epoch": 2.3760399334442597, "grad_norm": 12.394549369812012, "learning_rate": 5e-06, "loss": 0.6104, "num_input_tokens_seen": 44769100, "step": 714 }, { "epoch": 2.3760399334442597, "loss": 0.5975984334945679, "loss_ce": 0.0037263473495841026, "loss_iou": 0.162109375, "loss_num": 0.053955078125, "loss_xval": 0.59375, "num_input_tokens_seen": 44769100, "step": 714 }, { "epoch": 2.3793677204658903, "grad_norm": 13.236189842224121, "learning_rate": 5e-06, "loss": 0.9356, "num_input_tokens_seen": 44832944, "step": 715 }, { "epoch": 2.3793677204658903, "loss": 0.889470100402832, "loss_ce": 0.003239629790186882, "loss_iou": 0.3125, "loss_num": 0.052490234375, "loss_xval": 0.88671875, "num_input_tokens_seen": 44832944, "step": 715 }, { "epoch": 2.382695507487521, "grad_norm": 15.01264762878418, "learning_rate": 5e-06, "loss": 0.937, "num_input_tokens_seen": 44894316, "step": 716 }, { "epoch": 2.382695507487521, "loss": 0.9371458292007446, "loss_ce": 0.0013547912240028381, "loss_iou": 0.2265625, "loss_num": 0.0966796875, "loss_xval": 0.9375, "num_input_tokens_seen": 44894316, "step": 716 }, { "epoch": 2.3860232945091515, "grad_norm": 40.445865631103516, "learning_rate": 5e-06, "loss": 1.1198, "num_input_tokens_seen": 44958448, "step": 717 }, { "epoch": 2.3860232945091515, "loss": 1.0740020275115967, "loss_ce": 0.00027151533868163824, "loss_iou": 0.38671875, "loss_num": 0.0595703125, "loss_xval": 1.0703125, "num_input_tokens_seen": 44958448, "step": 717 }, { "epoch": 2.389351081530782, "grad_norm": 21.32187271118164, "learning_rate": 5e-06, "loss": 0.8088, "num_input_tokens_seen": 45020540, "step": 718 }, { "epoch": 2.389351081530782, "loss": 0.8202319145202637, "loss_ce": 0.00028564067906700075, "loss_iou": 0.267578125, "loss_num": 0.056884765625, "loss_xval": 0.8203125, "num_input_tokens_seen": 45020540, "step": 718 }, { "epoch": 2.3926788685524127, "grad_norm": 11.086956024169922, "learning_rate": 5e-06, "loss": 0.7241, "num_input_tokens_seen": 45081188, "step": 719 }, { "epoch": 2.3926788685524127, "loss": 0.6227037906646729, "loss_ce": 0.00014521228149533272, "loss_iou": 0.171875, "loss_num": 0.0556640625, "loss_xval": 0.62109375, "num_input_tokens_seen": 45081188, "step": 719 }, { "epoch": 2.3960066555740434, "grad_norm": 15.942041397094727, "learning_rate": 5e-06, "loss": 0.86, "num_input_tokens_seen": 45142776, "step": 720 }, { "epoch": 2.3960066555740434, "loss": 0.6935033798217773, "loss_ce": 0.0011205670889467, "loss_iou": 0.255859375, "loss_num": 0.035888671875, "loss_xval": 0.69140625, "num_input_tokens_seen": 45142776, "step": 720 }, { "epoch": 2.399334442595674, "grad_norm": 13.693964004516602, "learning_rate": 5e-06, "loss": 1.0913, "num_input_tokens_seen": 45205824, "step": 721 }, { "epoch": 2.399334442595674, "loss": 1.0079128742218018, "loss_ce": 0.0008326807874254882, "loss_iou": 0.294921875, "loss_num": 0.08349609375, "loss_xval": 1.0078125, "num_input_tokens_seen": 45205824, "step": 721 }, { "epoch": 2.4026622296173046, "grad_norm": 15.026814460754395, "learning_rate": 5e-06, "loss": 0.7762, "num_input_tokens_seen": 45268764, "step": 722 }, { "epoch": 2.4026622296173046, "loss": 0.6626920700073242, "loss_ce": 9.44572821026668e-05, "loss_iou": 0.19140625, "loss_num": 0.0556640625, "loss_xval": 0.6640625, "num_input_tokens_seen": 45268764, "step": 722 }, { "epoch": 2.405990016638935, "grad_norm": 11.194849014282227, "learning_rate": 5e-06, "loss": 0.6871, "num_input_tokens_seen": 45331492, "step": 723 }, { "epoch": 2.405990016638935, "loss": 0.48289889097213745, "loss_ce": 0.000721134536433965, "loss_iou": 0.125, "loss_num": 0.046630859375, "loss_xval": 0.482421875, "num_input_tokens_seen": 45331492, "step": 723 }, { "epoch": 2.409317803660566, "grad_norm": 15.214365005493164, "learning_rate": 5e-06, "loss": 0.6964, "num_input_tokens_seen": 45394532, "step": 724 }, { "epoch": 2.409317803660566, "loss": 0.8721684217453003, "loss_ce": 0.000769482518080622, "loss_iou": 0.30859375, "loss_num": 0.050537109375, "loss_xval": 0.87109375, "num_input_tokens_seen": 45394532, "step": 724 }, { "epoch": 2.4126455906821964, "grad_norm": 18.638185501098633, "learning_rate": 5e-06, "loss": 0.757, "num_input_tokens_seen": 45457232, "step": 725 }, { "epoch": 2.4126455906821964, "loss": 0.6739429235458374, "loss_ce": 0.000358955207047984, "loss_iou": 0.244140625, "loss_num": 0.037109375, "loss_xval": 0.671875, "num_input_tokens_seen": 45457232, "step": 725 }, { "epoch": 2.415973377703827, "grad_norm": 18.782562255859375, "learning_rate": 5e-06, "loss": 0.8527, "num_input_tokens_seen": 45519960, "step": 726 }, { "epoch": 2.415973377703827, "loss": 0.7281821370124817, "loss_ce": 0.00113136216532439, "loss_iou": 0.28125, "loss_num": 0.033203125, "loss_xval": 0.7265625, "num_input_tokens_seen": 45519960, "step": 726 }, { "epoch": 2.4193011647254576, "grad_norm": 35.47547149658203, "learning_rate": 5e-06, "loss": 1.1733, "num_input_tokens_seen": 45583772, "step": 727 }, { "epoch": 2.4193011647254576, "loss": 1.2746448516845703, "loss_ce": 0.00023083810810931027, "loss_iou": 0.47265625, "loss_num": 0.06591796875, "loss_xval": 1.2734375, "num_input_tokens_seen": 45583772, "step": 727 }, { "epoch": 2.4226289517470883, "grad_norm": 30.0634708404541, "learning_rate": 5e-06, "loss": 0.9175, "num_input_tokens_seen": 45646088, "step": 728 }, { "epoch": 2.4226289517470883, "loss": 0.9010690450668335, "loss_ce": 0.016364434733986855, "loss_iou": 0.302734375, "loss_num": 0.055908203125, "loss_xval": 0.8828125, "num_input_tokens_seen": 45646088, "step": 728 }, { "epoch": 2.425956738768719, "grad_norm": 15.3126859664917, "learning_rate": 5e-06, "loss": 0.6904, "num_input_tokens_seen": 45708808, "step": 729 }, { "epoch": 2.425956738768719, "loss": 0.5067150592803955, "loss_ce": 0.0014660221058875322, "loss_iou": 0.109375, "loss_num": 0.057373046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 45708808, "step": 729 }, { "epoch": 2.4292845257903495, "grad_norm": 19.397184371948242, "learning_rate": 5e-06, "loss": 0.8657, "num_input_tokens_seen": 45770068, "step": 730 }, { "epoch": 2.4292845257903495, "loss": 0.9862344264984131, "loss_ce": 0.001096484367735684, "loss_iou": 0.330078125, "loss_num": 0.0654296875, "loss_xval": 0.984375, "num_input_tokens_seen": 45770068, "step": 730 }, { "epoch": 2.43261231281198, "grad_norm": 7.857912063598633, "learning_rate": 5e-06, "loss": 0.6581, "num_input_tokens_seen": 45832556, "step": 731 }, { "epoch": 2.43261231281198, "loss": 0.5868287682533264, "loss_ce": 0.002844445873051882, "loss_iou": 0.173828125, "loss_num": 0.047119140625, "loss_xval": 0.5859375, "num_input_tokens_seen": 45832556, "step": 731 }, { "epoch": 2.4359400998336107, "grad_norm": 28.30218505859375, "learning_rate": 5e-06, "loss": 0.9152, "num_input_tokens_seen": 45894836, "step": 732 }, { "epoch": 2.4359400998336107, "loss": 1.060274362564087, "loss_ce": 0.0003377725079189986, "loss_iou": 0.3671875, "loss_num": 0.06494140625, "loss_xval": 1.0625, "num_input_tokens_seen": 45894836, "step": 732 }, { "epoch": 2.4392678868552413, "grad_norm": 17.082120895385742, "learning_rate": 5e-06, "loss": 0.698, "num_input_tokens_seen": 45957656, "step": 733 }, { "epoch": 2.4392678868552413, "loss": 0.6111936569213867, "loss_ce": 0.0005979241104796529, "loss_iou": 0.1962890625, "loss_num": 0.0439453125, "loss_xval": 0.609375, "num_input_tokens_seen": 45957656, "step": 733 }, { "epoch": 2.442595673876872, "grad_norm": 23.90694236755371, "learning_rate": 5e-06, "loss": 0.9885, "num_input_tokens_seen": 46021108, "step": 734 }, { "epoch": 2.442595673876872, "loss": 1.1393766403198242, "loss_ce": 0.0013150431914255023, "loss_iou": 0.322265625, "loss_num": 0.0986328125, "loss_xval": 1.140625, "num_input_tokens_seen": 46021108, "step": 734 }, { "epoch": 2.4459234608985025, "grad_norm": 11.997917175292969, "learning_rate": 5e-06, "loss": 0.8011, "num_input_tokens_seen": 46083808, "step": 735 }, { "epoch": 2.4459234608985025, "loss": 0.8153502345085144, "loss_ce": 0.003277485491707921, "loss_iou": 0.24609375, "loss_num": 0.06396484375, "loss_xval": 0.8125, "num_input_tokens_seen": 46083808, "step": 735 }, { "epoch": 2.449251247920133, "grad_norm": 27.563457489013672, "learning_rate": 5e-06, "loss": 0.8633, "num_input_tokens_seen": 46146540, "step": 736 }, { "epoch": 2.449251247920133, "loss": 0.8773728609085083, "loss_ce": 0.0001755441480781883, "loss_iou": 0.287109375, "loss_num": 0.060546875, "loss_xval": 0.87890625, "num_input_tokens_seen": 46146540, "step": 736 }, { "epoch": 2.4525790349417638, "grad_norm": 14.77198600769043, "learning_rate": 5e-06, "loss": 0.9703, "num_input_tokens_seen": 46209148, "step": 737 }, { "epoch": 2.4525790349417638, "loss": 0.757889986038208, "loss_ce": 0.0027630457188934088, "loss_iou": 0.2412109375, "loss_num": 0.0546875, "loss_xval": 0.75390625, "num_input_tokens_seen": 46209148, "step": 737 }, { "epoch": 2.4559068219633944, "grad_norm": 11.89362907409668, "learning_rate": 5e-06, "loss": 0.7569, "num_input_tokens_seen": 46272732, "step": 738 }, { "epoch": 2.4559068219633944, "loss": 0.7210886478424072, "loss_ce": 0.0008737589814700186, "loss_iou": 0.24609375, "loss_num": 0.04541015625, "loss_xval": 0.71875, "num_input_tokens_seen": 46272732, "step": 738 }, { "epoch": 2.459234608985025, "grad_norm": 4.648507118225098, "learning_rate": 5e-06, "loss": 0.619, "num_input_tokens_seen": 46336108, "step": 739 }, { "epoch": 2.459234608985025, "loss": 0.7744088768959045, "loss_ce": 0.00014738636673428118, "loss_iou": 0.25, "loss_num": 0.0546875, "loss_xval": 0.7734375, "num_input_tokens_seen": 46336108, "step": 739 }, { "epoch": 2.4625623960066556, "grad_norm": 21.0081844329834, "learning_rate": 5e-06, "loss": 0.7802, "num_input_tokens_seen": 46397180, "step": 740 }, { "epoch": 2.4625623960066556, "loss": 0.857671856880188, "loss_ce": 0.0025693103671073914, "loss_iou": 0.2734375, "loss_num": 0.061767578125, "loss_xval": 0.85546875, "num_input_tokens_seen": 46397180, "step": 740 }, { "epoch": 2.465890183028286, "grad_norm": 13.0715913772583, "learning_rate": 5e-06, "loss": 0.7849, "num_input_tokens_seen": 46459372, "step": 741 }, { "epoch": 2.465890183028286, "loss": 0.6565080881118774, "loss_ce": 0.006361585110425949, "loss_iou": 0.1689453125, "loss_num": 0.0625, "loss_xval": 0.6484375, "num_input_tokens_seen": 46459372, "step": 741 }, { "epoch": 2.469217970049917, "grad_norm": 9.451695442199707, "learning_rate": 5e-06, "loss": 0.991, "num_input_tokens_seen": 46523256, "step": 742 }, { "epoch": 2.469217970049917, "loss": 1.2594883441925049, "loss_ce": 0.00021096415002830327, "loss_iou": 0.44921875, "loss_num": 0.072265625, "loss_xval": 1.2578125, "num_input_tokens_seen": 46523256, "step": 742 }, { "epoch": 2.4725457570715474, "grad_norm": 13.714978218078613, "learning_rate": 5e-06, "loss": 0.9132, "num_input_tokens_seen": 46587516, "step": 743 }, { "epoch": 2.4725457570715474, "loss": 1.0107247829437256, "loss_ce": 0.0005928677855990827, "loss_iou": 0.3359375, "loss_num": 0.0673828125, "loss_xval": 1.0078125, "num_input_tokens_seen": 46587516, "step": 743 }, { "epoch": 2.475873544093178, "grad_norm": 12.65008544921875, "learning_rate": 5e-06, "loss": 1.0877, "num_input_tokens_seen": 46650720, "step": 744 }, { "epoch": 2.475873544093178, "loss": 1.1367709636688232, "loss_ce": 0.0029819714836776257, "loss_iou": 0.359375, "loss_num": 0.0830078125, "loss_xval": 1.1328125, "num_input_tokens_seen": 46650720, "step": 744 }, { "epoch": 2.4792013311148087, "grad_norm": 36.57686996459961, "learning_rate": 5e-06, "loss": 0.8593, "num_input_tokens_seen": 46715288, "step": 745 }, { "epoch": 2.4792013311148087, "loss": 0.6860495209693909, "loss_ce": 1.4360236491484102e-05, "loss_iou": 0.23046875, "loss_num": 0.045166015625, "loss_xval": 0.6875, "num_input_tokens_seen": 46715288, "step": 745 }, { "epoch": 2.4825291181364393, "grad_norm": 12.971878051757812, "learning_rate": 5e-06, "loss": 0.7946, "num_input_tokens_seen": 46777768, "step": 746 }, { "epoch": 2.4825291181364393, "loss": 0.78072589635849, "loss_ce": 0.0009407766629010439, "loss_iou": 0.212890625, "loss_num": 0.07080078125, "loss_xval": 0.78125, "num_input_tokens_seen": 46777768, "step": 746 }, { "epoch": 2.48585690515807, "grad_norm": 11.606049537658691, "learning_rate": 5e-06, "loss": 0.8739, "num_input_tokens_seen": 46839216, "step": 747 }, { "epoch": 2.48585690515807, "loss": 0.9141720533370972, "loss_ce": 0.0010860441252589226, "loss_iou": 0.3203125, "loss_num": 0.0546875, "loss_xval": 0.9140625, "num_input_tokens_seen": 46839216, "step": 747 }, { "epoch": 2.4891846921797005, "grad_norm": 17.032249450683594, "learning_rate": 5e-06, "loss": 0.5929, "num_input_tokens_seen": 46899036, "step": 748 }, { "epoch": 2.4891846921797005, "loss": 0.5191299319267273, "loss_ce": 8.698241435922682e-05, "loss_iou": 0.126953125, "loss_num": 0.052978515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 46899036, "step": 748 }, { "epoch": 2.492512479201331, "grad_norm": 11.303759574890137, "learning_rate": 5e-06, "loss": 0.805, "num_input_tokens_seen": 46961588, "step": 749 }, { "epoch": 2.492512479201331, "loss": 0.8356009721755981, "loss_ce": 0.000151735614053905, "loss_iou": 0.2275390625, "loss_num": 0.076171875, "loss_xval": 0.8359375, "num_input_tokens_seen": 46961588, "step": 749 }, { "epoch": 2.4958402662229617, "grad_norm": 23.632003784179688, "learning_rate": 5e-06, "loss": 0.833, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4958402662229617, "eval_seeclick_CIoU": 0.13491493463516235, "eval_seeclick_GIoU": 0.15883130580186844, "eval_seeclick_IoU": 0.24382416903972626, "eval_seeclick_MAE_all": 0.19065534323453903, "eval_seeclick_MAE_h": 0.05082565359771252, "eval_seeclick_MAE_w": 0.13937117159366608, "eval_seeclick_MAE_x_boxes": 0.30929259955883026, "eval_seeclick_MAE_y_boxes": 0.13166731595993042, "eval_seeclick_NUM_probability": 0.9999373257160187, "eval_seeclick_inside_bbox": 0.35208334028720856, "eval_seeclick_loss": 2.6928210258483887, "eval_seeclick_loss_ce": 0.08299023285508156, "eval_seeclick_loss_iou": 0.830810546875, "eval_seeclick_loss_num": 0.1858978271484375, "eval_seeclick_loss_xval": 2.59033203125, "eval_seeclick_runtime": 62.8164, "eval_seeclick_samples_per_second": 0.748, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4958402662229617, "eval_icons_CIoU": 0.05114769656211138, "eval_icons_GIoU": 0.15702077746391296, "eval_icons_IoU": 0.2036093920469284, "eval_icons_MAE_all": 0.12747248262166977, "eval_icons_MAE_h": 0.052048404701054096, "eval_icons_MAE_w": 0.1445429064333439, "eval_icons_MAE_x_boxes": 0.12461870163679123, "eval_icons_MAE_y_boxes": 0.046514895744621754, "eval_icons_NUM_probability": 0.9999918937683105, "eval_icons_inside_bbox": 0.3576388955116272, "eval_icons_loss": 2.3343775272369385, "eval_icons_loss_ce": 2.2014152136762277e-06, "eval_icons_loss_iou": 0.830078125, "eval_icons_loss_num": 0.1331043243408203, "eval_icons_loss_xval": 2.32666015625, "eval_icons_runtime": 65.6403, "eval_icons_samples_per_second": 0.762, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4958402662229617, "eval_screenspot_CIoU": 0.039610182866454124, "eval_screenspot_GIoU": 0.10459545751412709, "eval_screenspot_IoU": 0.1884987105925878, "eval_screenspot_MAE_all": 0.20243261754512787, "eval_screenspot_MAE_h": 0.0592109685142835, "eval_screenspot_MAE_w": 0.18089259415864944, "eval_screenspot_MAE_x_boxes": 0.250293031334877, "eval_screenspot_MAE_y_boxes": 0.1454415942231814, "eval_screenspot_NUM_probability": 0.9999621907869974, "eval_screenspot_inside_bbox": 0.3387500047683716, "eval_screenspot_loss": 2.80856990814209, "eval_screenspot_loss_ce": 0.00027161008135105175, "eval_screenspot_loss_iou": 0.9033203125, "eval_screenspot_loss_num": 0.20716349283854166, "eval_screenspot_loss_xval": 2.8421223958333335, "eval_screenspot_runtime": 126.77, "eval_screenspot_samples_per_second": 0.702, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4958402662229617, "eval_compot_CIoU": -0.054212167859077454, "eval_compot_GIoU": 0.057059221900999546, "eval_compot_IoU": 0.11786043643951416, "eval_compot_MAE_all": 0.2351270318031311, "eval_compot_MAE_h": 0.06904683262109756, "eval_compot_MAE_w": 0.2527950033545494, "eval_compot_MAE_x_boxes": 0.19278910011053085, "eval_compot_MAE_y_boxes": 0.18391906470060349, "eval_compot_NUM_probability": 0.9999668002128601, "eval_compot_inside_bbox": 0.2239583358168602, "eval_compot_loss": 2.9689693450927734, "eval_compot_loss_ce": 0.006361398845911026, "eval_compot_loss_iou": 0.924072265625, "eval_compot_loss_num": 0.2135009765625, "eval_compot_loss_xval": 2.916015625, "eval_compot_runtime": 72.4042, "eval_compot_samples_per_second": 0.691, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4958402662229617, "eval_custom_ui_MAE_all": 0.09127533435821533, "eval_custom_ui_MAE_x": 0.09527997672557831, "eval_custom_ui_MAE_y": 0.08727069199085236, "eval_custom_ui_NUM_probability": 0.999992311000824, "eval_custom_ui_loss": 0.4499455690383911, "eval_custom_ui_loss_ce": 8.726405303605134e-05, "eval_custom_ui_loss_num": 0.0854949951171875, "eval_custom_ui_loss_xval": 0.4273681640625, "eval_custom_ui_runtime": 56.9564, "eval_custom_ui_samples_per_second": 0.878, "eval_custom_ui_steps_per_second": 0.035, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4958402662229617, "loss": 0.40762990713119507, "loss_ce": 0.00015917516429908574, "loss_iou": 0.0, "loss_num": 0.08154296875, "loss_xval": 0.408203125, "num_input_tokens_seen": 47024696, "step": 750 }, { "epoch": 2.4991680532445923, "grad_norm": 18.54668426513672, "learning_rate": 5e-06, "loss": 0.8896, "num_input_tokens_seen": 47088064, "step": 751 }, { "epoch": 2.4991680532445923, "loss": 0.5935692191123962, "loss_ce": 0.0016502785729244351, "loss_iou": 0.16796875, "loss_num": 0.05126953125, "loss_xval": 0.59375, "num_input_tokens_seen": 47088064, "step": 751 }, { "epoch": 2.502495840266223, "grad_norm": 11.817997932434082, "learning_rate": 5e-06, "loss": 0.8837, "num_input_tokens_seen": 47150452, "step": 752 }, { "epoch": 2.502495840266223, "loss": 0.8448632955551147, "loss_ce": 0.00044191486085765064, "loss_iou": 0.30859375, "loss_num": 0.045654296875, "loss_xval": 0.84375, "num_input_tokens_seen": 47150452, "step": 752 }, { "epoch": 2.5058236272878536, "grad_norm": 23.670198440551758, "learning_rate": 5e-06, "loss": 0.7463, "num_input_tokens_seen": 47212136, "step": 753 }, { "epoch": 2.5058236272878536, "loss": 0.8123268485069275, "loss_ce": 7.097484922269359e-05, "loss_iou": 0.28515625, "loss_num": 0.048583984375, "loss_xval": 0.8125, "num_input_tokens_seen": 47212136, "step": 753 }, { "epoch": 2.509151414309484, "grad_norm": 19.003517150878906, "learning_rate": 5e-06, "loss": 0.7155, "num_input_tokens_seen": 47275172, "step": 754 }, { "epoch": 2.509151414309484, "loss": 0.7601994276046753, "loss_ce": 0.0021427858155220747, "loss_iou": 0.255859375, "loss_num": 0.04931640625, "loss_xval": 0.7578125, "num_input_tokens_seen": 47275172, "step": 754 }, { "epoch": 2.512479201331115, "grad_norm": 42.69208908081055, "learning_rate": 5e-06, "loss": 0.8689, "num_input_tokens_seen": 47336960, "step": 755 }, { "epoch": 2.512479201331115, "loss": 1.1230835914611816, "loss_ce": 0.0015015102690085769, "loss_iou": 0.3828125, "loss_num": 0.0712890625, "loss_xval": 1.125, "num_input_tokens_seen": 47336960, "step": 755 }, { "epoch": 2.5158069883527454, "grad_norm": 18.97686004638672, "learning_rate": 5e-06, "loss": 0.9074, "num_input_tokens_seen": 47400968, "step": 756 }, { "epoch": 2.5158069883527454, "loss": 0.8372678756713867, "loss_ce": 0.000353793177055195, "loss_iou": 0.26171875, "loss_num": 0.06298828125, "loss_xval": 0.8359375, "num_input_tokens_seen": 47400968, "step": 756 }, { "epoch": 2.519134775374376, "grad_norm": 19.407001495361328, "learning_rate": 5e-06, "loss": 0.7905, "num_input_tokens_seen": 47462844, "step": 757 }, { "epoch": 2.519134775374376, "loss": 0.7666225433349609, "loss_ce": 0.0009975419379770756, "loss_iou": 0.171875, "loss_num": 0.08447265625, "loss_xval": 0.765625, "num_input_tokens_seen": 47462844, "step": 757 }, { "epoch": 2.5224625623960066, "grad_norm": 21.340974807739258, "learning_rate": 5e-06, "loss": 0.8336, "num_input_tokens_seen": 47524364, "step": 758 }, { "epoch": 2.5224625623960066, "loss": 1.158284068107605, "loss_ce": 0.0013016541488468647, "loss_iou": 0.361328125, "loss_num": 0.08740234375, "loss_xval": 1.15625, "num_input_tokens_seen": 47524364, "step": 758 }, { "epoch": 2.5257903494176372, "grad_norm": 12.445579528808594, "learning_rate": 5e-06, "loss": 0.7022, "num_input_tokens_seen": 47587348, "step": 759 }, { "epoch": 2.5257903494176372, "loss": 0.6259897947311401, "loss_ce": 0.0009897334966808558, "loss_iou": 0.2109375, "loss_num": 0.04052734375, "loss_xval": 0.625, "num_input_tokens_seen": 47587348, "step": 759 }, { "epoch": 2.529118136439268, "grad_norm": 11.766027450561523, "learning_rate": 5e-06, "loss": 0.9098, "num_input_tokens_seen": 47649600, "step": 760 }, { "epoch": 2.529118136439268, "loss": 0.6773974895477295, "loss_ce": 0.0006396822864189744, "loss_iou": 0.234375, "loss_num": 0.04150390625, "loss_xval": 0.67578125, "num_input_tokens_seen": 47649600, "step": 760 }, { "epoch": 2.5324459234608985, "grad_norm": 12.828213691711426, "learning_rate": 5e-06, "loss": 0.7839, "num_input_tokens_seen": 47712632, "step": 761 }, { "epoch": 2.5324459234608985, "loss": 0.7082505226135254, "loss_ce": 0.00012061389134032652, "loss_iou": 0.236328125, "loss_num": 0.046875, "loss_xval": 0.70703125, "num_input_tokens_seen": 47712632, "step": 761 }, { "epoch": 2.535773710482529, "grad_norm": 21.702556610107422, "learning_rate": 5e-06, "loss": 0.9561, "num_input_tokens_seen": 47776100, "step": 762 }, { "epoch": 2.535773710482529, "loss": 0.7422810792922974, "loss_ce": 9.361249976791441e-05, "loss_iou": 0.2373046875, "loss_num": 0.0537109375, "loss_xval": 0.7421875, "num_input_tokens_seen": 47776100, "step": 762 }, { "epoch": 2.5391014975041597, "grad_norm": 15.470916748046875, "learning_rate": 5e-06, "loss": 0.9802, "num_input_tokens_seen": 47837816, "step": 763 }, { "epoch": 2.5391014975041597, "loss": 1.192439317703247, "loss_ce": 0.000544731505215168, "loss_iou": 0.390625, "loss_num": 0.08251953125, "loss_xval": 1.1953125, "num_input_tokens_seen": 47837816, "step": 763 }, { "epoch": 2.5424292845257903, "grad_norm": 21.050203323364258, "learning_rate": 5e-06, "loss": 0.906, "num_input_tokens_seen": 47899704, "step": 764 }, { "epoch": 2.5424292845257903, "loss": 0.768609344959259, "loss_ce": 0.000665017869323492, "loss_iou": 0.29296875, "loss_num": 0.036865234375, "loss_xval": 0.76953125, "num_input_tokens_seen": 47899704, "step": 764 }, { "epoch": 2.545757071547421, "grad_norm": 10.072230339050293, "learning_rate": 5e-06, "loss": 0.5435, "num_input_tokens_seen": 47961524, "step": 765 }, { "epoch": 2.545757071547421, "loss": 0.4695216119289398, "loss_ce": 0.0011378447525203228, "loss_iou": 0.08349609375, "loss_num": 0.060302734375, "loss_xval": 0.46875, "num_input_tokens_seen": 47961524, "step": 765 }, { "epoch": 2.5490848585690515, "grad_norm": 7.9047136306762695, "learning_rate": 5e-06, "loss": 0.6468, "num_input_tokens_seen": 48020096, "step": 766 }, { "epoch": 2.5490848585690515, "loss": 0.7101080417633057, "loss_ce": 0.0008795225294306874, "loss_iou": 0.10986328125, "loss_num": 0.09814453125, "loss_xval": 0.7109375, "num_input_tokens_seen": 48020096, "step": 766 }, { "epoch": 2.552412645590682, "grad_norm": 9.703407287597656, "learning_rate": 5e-06, "loss": 0.5771, "num_input_tokens_seen": 48082036, "step": 767 }, { "epoch": 2.552412645590682, "loss": 0.5914462208747864, "loss_ce": 0.000625885440967977, "loss_iou": 0.2119140625, "loss_num": 0.033447265625, "loss_xval": 0.58984375, "num_input_tokens_seen": 48082036, "step": 767 }, { "epoch": 2.5557404326123128, "grad_norm": 22.55636215209961, "learning_rate": 5e-06, "loss": 1.1754, "num_input_tokens_seen": 48142276, "step": 768 }, { "epoch": 2.5557404326123128, "loss": 0.9645742774009705, "loss_ce": 0.00021879022824577987, "loss_iou": 0.298828125, "loss_num": 0.0732421875, "loss_xval": 0.96484375, "num_input_tokens_seen": 48142276, "step": 768 }, { "epoch": 2.5590682196339434, "grad_norm": 23.570049285888672, "learning_rate": 5e-06, "loss": 0.7371, "num_input_tokens_seen": 48204608, "step": 769 }, { "epoch": 2.5590682196339434, "loss": 0.6285191774368286, "loss_ce": 0.0007115625194273889, "loss_iou": 0.220703125, "loss_num": 0.037353515625, "loss_xval": 0.62890625, "num_input_tokens_seen": 48204608, "step": 769 }, { "epoch": 2.562396006655574, "grad_norm": 30.504138946533203, "learning_rate": 5e-06, "loss": 0.7577, "num_input_tokens_seen": 48268100, "step": 770 }, { "epoch": 2.562396006655574, "loss": 0.6577082872390747, "loss_ce": 0.00011549589544301853, "loss_iou": 0.1953125, "loss_num": 0.0537109375, "loss_xval": 0.65625, "num_input_tokens_seen": 48268100, "step": 770 }, { "epoch": 2.5657237936772046, "grad_norm": 25.44117546081543, "learning_rate": 5e-06, "loss": 0.9569, "num_input_tokens_seen": 48330448, "step": 771 }, { "epoch": 2.5657237936772046, "loss": 0.9606022834777832, "loss_ce": 0.0006413036026060581, "loss_iou": 0.318359375, "loss_num": 0.064453125, "loss_xval": 0.9609375, "num_input_tokens_seen": 48330448, "step": 771 }, { "epoch": 2.569051580698835, "grad_norm": 26.2667293548584, "learning_rate": 5e-06, "loss": 0.8804, "num_input_tokens_seen": 48393420, "step": 772 }, { "epoch": 2.569051580698835, "loss": 0.798115074634552, "loss_ce": 0.004169770982116461, "loss_iou": 0.291015625, "loss_num": 0.042724609375, "loss_xval": 0.79296875, "num_input_tokens_seen": 48393420, "step": 772 }, { "epoch": 2.572379367720466, "grad_norm": 11.151239395141602, "learning_rate": 5e-06, "loss": 0.6429, "num_input_tokens_seen": 48455548, "step": 773 }, { "epoch": 2.572379367720466, "loss": 0.5176081657409668, "loss_ce": 0.0030817792285233736, "loss_iou": 0.146484375, "loss_num": 0.04443359375, "loss_xval": 0.515625, "num_input_tokens_seen": 48455548, "step": 773 }, { "epoch": 2.5757071547420964, "grad_norm": 17.321378707885742, "learning_rate": 5e-06, "loss": 0.9045, "num_input_tokens_seen": 48519460, "step": 774 }, { "epoch": 2.5757071547420964, "loss": 1.2099051475524902, "loss_ce": 0.0021415799856185913, "loss_iou": 0.4296875, "loss_num": 0.0693359375, "loss_xval": 1.2109375, "num_input_tokens_seen": 48519460, "step": 774 }, { "epoch": 2.579034941763727, "grad_norm": 13.804886817932129, "learning_rate": 5e-06, "loss": 0.8553, "num_input_tokens_seen": 48581736, "step": 775 }, { "epoch": 2.579034941763727, "loss": 0.8312492966651917, "loss_ce": 0.00019460837938822806, "loss_iou": 0.279296875, "loss_num": 0.05419921875, "loss_xval": 0.83203125, "num_input_tokens_seen": 48581736, "step": 775 }, { "epoch": 2.5823627287853577, "grad_norm": 28.333999633789062, "learning_rate": 5e-06, "loss": 0.7878, "num_input_tokens_seen": 48645052, "step": 776 }, { "epoch": 2.5823627287853577, "loss": 0.7494910359382629, "loss_ce": 0.0012000187998637557, "loss_iou": 0.26171875, "loss_num": 0.044677734375, "loss_xval": 0.75, "num_input_tokens_seen": 48645052, "step": 776 }, { "epoch": 2.5856905158069883, "grad_norm": 13.613855361938477, "learning_rate": 5e-06, "loss": 0.8614, "num_input_tokens_seen": 48708828, "step": 777 }, { "epoch": 2.5856905158069883, "loss": 0.8165592551231384, "loss_ce": 0.0006412834627553821, "loss_iou": 0.314453125, "loss_num": 0.03759765625, "loss_xval": 0.81640625, "num_input_tokens_seen": 48708828, "step": 777 }, { "epoch": 2.589018302828619, "grad_norm": 19.142955780029297, "learning_rate": 5e-06, "loss": 0.829, "num_input_tokens_seen": 48771620, "step": 778 }, { "epoch": 2.589018302828619, "loss": 1.0290508270263672, "loss_ce": 0.0012187063694000244, "loss_iou": 0.35546875, "loss_num": 0.06396484375, "loss_xval": 1.03125, "num_input_tokens_seen": 48771620, "step": 778 }, { "epoch": 2.5923460898502495, "grad_norm": 18.934194564819336, "learning_rate": 5e-06, "loss": 0.8152, "num_input_tokens_seen": 48833540, "step": 779 }, { "epoch": 2.5923460898502495, "loss": 0.8391866087913513, "loss_ce": 0.001295918715186417, "loss_iou": 0.240234375, "loss_num": 0.0712890625, "loss_xval": 0.8359375, "num_input_tokens_seen": 48833540, "step": 779 }, { "epoch": 2.59567387687188, "grad_norm": 18.425006866455078, "learning_rate": 5e-06, "loss": 0.8615, "num_input_tokens_seen": 48896456, "step": 780 }, { "epoch": 2.59567387687188, "loss": 0.8006330132484436, "loss_ce": 0.00046212831512093544, "loss_iou": 0.265625, "loss_num": 0.0537109375, "loss_xval": 0.80078125, "num_input_tokens_seen": 48896456, "step": 780 }, { "epoch": 2.5990016638935107, "grad_norm": 18.229389190673828, "learning_rate": 5e-06, "loss": 0.7638, "num_input_tokens_seen": 48959876, "step": 781 }, { "epoch": 2.5990016638935107, "loss": 0.8060437440872192, "loss_ce": 0.00013553237658925354, "loss_iou": 0.26171875, "loss_num": 0.056640625, "loss_xval": 0.8046875, "num_input_tokens_seen": 48959876, "step": 781 }, { "epoch": 2.6023294509151413, "grad_norm": 10.035086631774902, "learning_rate": 5e-06, "loss": 0.5497, "num_input_tokens_seen": 49020532, "step": 782 }, { "epoch": 2.6023294509151413, "loss": 0.5629553198814392, "loss_ce": 8.909497410058975e-05, "loss_iou": 0.12451171875, "loss_num": 0.0625, "loss_xval": 0.5625, "num_input_tokens_seen": 49020532, "step": 782 }, { "epoch": 2.605657237936772, "grad_norm": 26.17829704284668, "learning_rate": 5e-06, "loss": 0.7714, "num_input_tokens_seen": 49082484, "step": 783 }, { "epoch": 2.605657237936772, "loss": 0.8501694202423096, "loss_ce": 0.0005600237054750323, "loss_iou": 0.318359375, "loss_num": 0.042724609375, "loss_xval": 0.8515625, "num_input_tokens_seen": 49082484, "step": 783 }, { "epoch": 2.6089850249584026, "grad_norm": 22.57184410095215, "learning_rate": 5e-06, "loss": 0.9777, "num_input_tokens_seen": 49145952, "step": 784 }, { "epoch": 2.6089850249584026, "loss": 0.7366130352020264, "loss_ce": 4.0785289456835017e-05, "loss_iou": 0.212890625, "loss_num": 0.062255859375, "loss_xval": 0.73828125, "num_input_tokens_seen": 49145952, "step": 784 }, { "epoch": 2.612312811980033, "grad_norm": 12.318331718444824, "learning_rate": 5e-06, "loss": 0.8321, "num_input_tokens_seen": 49206352, "step": 785 }, { "epoch": 2.612312811980033, "loss": 0.8576995134353638, "loss_ce": 0.004183888901025057, "loss_iou": 0.310546875, "loss_num": 0.04638671875, "loss_xval": 0.8515625, "num_input_tokens_seen": 49206352, "step": 785 }, { "epoch": 2.615640599001664, "grad_norm": 11.607172966003418, "learning_rate": 5e-06, "loss": 0.7514, "num_input_tokens_seen": 49268404, "step": 786 }, { "epoch": 2.615640599001664, "loss": 0.8183543086051941, "loss_ce": 0.0019480856135487556, "loss_iou": 0.28515625, "loss_num": 0.049072265625, "loss_xval": 0.81640625, "num_input_tokens_seen": 49268404, "step": 786 }, { "epoch": 2.6189683860232944, "grad_norm": 14.248104095458984, "learning_rate": 5e-06, "loss": 0.875, "num_input_tokens_seen": 49332036, "step": 787 }, { "epoch": 2.6189683860232944, "loss": 0.7565048336982727, "loss_ce": 0.0011337447213009, "loss_iou": 0.251953125, "loss_num": 0.050537109375, "loss_xval": 0.75390625, "num_input_tokens_seen": 49332036, "step": 787 }, { "epoch": 2.622296173044925, "grad_norm": 17.36334800720215, "learning_rate": 5e-06, "loss": 0.7014, "num_input_tokens_seen": 49394740, "step": 788 }, { "epoch": 2.622296173044925, "loss": 0.5740655064582825, "loss_ce": 0.0005791678559035063, "loss_iou": 0.185546875, "loss_num": 0.040283203125, "loss_xval": 0.57421875, "num_input_tokens_seen": 49394740, "step": 788 }, { "epoch": 2.6256239600665556, "grad_norm": 20.616777420043945, "learning_rate": 5e-06, "loss": 0.8368, "num_input_tokens_seen": 49456944, "step": 789 }, { "epoch": 2.6256239600665556, "loss": 0.8422057628631592, "loss_ce": 0.00028675797511823475, "loss_iou": 0.279296875, "loss_num": 0.056396484375, "loss_xval": 0.84375, "num_input_tokens_seen": 49456944, "step": 789 }, { "epoch": 2.6289517470881862, "grad_norm": 19.112476348876953, "learning_rate": 5e-06, "loss": 0.9446, "num_input_tokens_seen": 49520124, "step": 790 }, { "epoch": 2.6289517470881862, "loss": 0.8822909593582153, "loss_ce": 8.882155816536397e-05, "loss_iou": 0.3203125, "loss_num": 0.048095703125, "loss_xval": 0.8828125, "num_input_tokens_seen": 49520124, "step": 790 }, { "epoch": 2.632279534109817, "grad_norm": 11.602209091186523, "learning_rate": 5e-06, "loss": 0.9648, "num_input_tokens_seen": 49583060, "step": 791 }, { "epoch": 2.632279534109817, "loss": 0.8904417753219604, "loss_ce": 0.001037474605254829, "loss_iou": 0.291015625, "loss_num": 0.061279296875, "loss_xval": 0.890625, "num_input_tokens_seen": 49583060, "step": 791 }, { "epoch": 2.6356073211314475, "grad_norm": 19.73798179626465, "learning_rate": 5e-06, "loss": 0.9234, "num_input_tokens_seen": 49643248, "step": 792 }, { "epoch": 2.6356073211314475, "loss": 0.8780515193939209, "loss_ce": 0.00012182131467852741, "loss_iou": 0.296875, "loss_num": 0.056640625, "loss_xval": 0.87890625, "num_input_tokens_seen": 49643248, "step": 792 }, { "epoch": 2.638935108153078, "grad_norm": 31.99167251586914, "learning_rate": 5e-06, "loss": 1.2363, "num_input_tokens_seen": 49706308, "step": 793 }, { "epoch": 2.638935108153078, "loss": 1.3618634939193726, "loss_ce": 4.7138204536167905e-05, "loss_iou": 0.4609375, "loss_num": 0.08837890625, "loss_xval": 1.359375, "num_input_tokens_seen": 49706308, "step": 793 }, { "epoch": 2.6422628951747087, "grad_norm": 27.228073120117188, "learning_rate": 5e-06, "loss": 0.9539, "num_input_tokens_seen": 49769864, "step": 794 }, { "epoch": 2.6422628951747087, "loss": 1.108590006828308, "loss_ce": 0.00019164662808179855, "loss_iou": 0.37890625, "loss_num": 0.06982421875, "loss_xval": 1.109375, "num_input_tokens_seen": 49769864, "step": 794 }, { "epoch": 2.6455906821963393, "grad_norm": 21.955646514892578, "learning_rate": 5e-06, "loss": 1.0233, "num_input_tokens_seen": 49834096, "step": 795 }, { "epoch": 2.6455906821963393, "loss": 0.9163686037063599, "loss_ce": 0.00010885349911404774, "loss_iou": 0.291015625, "loss_num": 0.0673828125, "loss_xval": 0.91796875, "num_input_tokens_seen": 49834096, "step": 795 }, { "epoch": 2.64891846921797, "grad_norm": 17.316287994384766, "learning_rate": 5e-06, "loss": 0.6946, "num_input_tokens_seen": 49897404, "step": 796 }, { "epoch": 2.64891846921797, "loss": 0.49226897954940796, "loss_ce": 0.0015463390154764056, "loss_iou": 0.12060546875, "loss_num": 0.050048828125, "loss_xval": 0.490234375, "num_input_tokens_seen": 49897404, "step": 796 }, { "epoch": 2.6522462562396005, "grad_norm": 9.91407299041748, "learning_rate": 5e-06, "loss": 0.9557, "num_input_tokens_seen": 49960132, "step": 797 }, { "epoch": 2.6522462562396005, "loss": 1.2538728713989258, "loss_ce": 0.0002108162734657526, "loss_iou": 0.466796875, "loss_num": 0.0634765625, "loss_xval": 1.25, "num_input_tokens_seen": 49960132, "step": 797 }, { "epoch": 2.655574043261231, "grad_norm": 13.048734664916992, "learning_rate": 5e-06, "loss": 0.9346, "num_input_tokens_seen": 50021972, "step": 798 }, { "epoch": 2.655574043261231, "loss": 0.8867144584655762, "loss_ce": 0.00023978884564712644, "loss_iou": 0.267578125, "loss_num": 0.0703125, "loss_xval": 0.88671875, "num_input_tokens_seen": 50021972, "step": 798 }, { "epoch": 2.6589018302828618, "grad_norm": 20.037155151367188, "learning_rate": 5e-06, "loss": 1.0502, "num_input_tokens_seen": 50084912, "step": 799 }, { "epoch": 2.6589018302828618, "loss": 1.0424444675445557, "loss_ce": 0.00020819506607949734, "loss_iou": 0.365234375, "loss_num": 0.06201171875, "loss_xval": 1.0390625, "num_input_tokens_seen": 50084912, "step": 799 }, { "epoch": 2.6622296173044924, "grad_norm": 23.562681198120117, "learning_rate": 5e-06, "loss": 1.143, "num_input_tokens_seen": 50148712, "step": 800 }, { "epoch": 2.6622296173044924, "loss": 1.1583603620529175, "loss_ce": 0.001622087205760181, "loss_iou": 0.375, "loss_num": 0.0810546875, "loss_xval": 1.15625, "num_input_tokens_seen": 50148712, "step": 800 }, { "epoch": 2.665557404326123, "grad_norm": 12.925877571105957, "learning_rate": 5e-06, "loss": 0.807, "num_input_tokens_seen": 50210512, "step": 801 }, { "epoch": 2.665557404326123, "loss": 1.0540846586227417, "loss_ce": 0.0003736392827704549, "loss_iou": 0.390625, "loss_num": 0.054443359375, "loss_xval": 1.0546875, "num_input_tokens_seen": 50210512, "step": 801 }, { "epoch": 2.6688851913477536, "grad_norm": 11.798089027404785, "learning_rate": 5e-06, "loss": 0.9187, "num_input_tokens_seen": 50274604, "step": 802 }, { "epoch": 2.6688851913477536, "loss": 0.7706761956214905, "loss_ce": 0.00041252042865380645, "loss_iou": 0.26953125, "loss_num": 0.046630859375, "loss_xval": 0.76953125, "num_input_tokens_seen": 50274604, "step": 802 }, { "epoch": 2.672212978369384, "grad_norm": 9.151251792907715, "learning_rate": 5e-06, "loss": 0.5951, "num_input_tokens_seen": 50336456, "step": 803 }, { "epoch": 2.672212978369384, "loss": 0.6084632873535156, "loss_ce": 6.486372876679525e-05, "loss_iou": 0.212890625, "loss_num": 0.03662109375, "loss_xval": 0.609375, "num_input_tokens_seen": 50336456, "step": 803 }, { "epoch": 2.675540765391015, "grad_norm": 66.70578002929688, "learning_rate": 5e-06, "loss": 0.7686, "num_input_tokens_seen": 50400404, "step": 804 }, { "epoch": 2.675540765391015, "loss": 0.7586301565170288, "loss_ce": 8.528407488483936e-05, "loss_iou": 0.29296875, "loss_num": 0.034912109375, "loss_xval": 0.7578125, "num_input_tokens_seen": 50400404, "step": 804 }, { "epoch": 2.6788685524126454, "grad_norm": 14.901700019836426, "learning_rate": 5e-06, "loss": 0.8289, "num_input_tokens_seen": 50464096, "step": 805 }, { "epoch": 2.6788685524126454, "loss": 0.9449521899223328, "loss_ce": 0.0013486790703609586, "loss_iou": 0.359375, "loss_num": 0.04541015625, "loss_xval": 0.9453125, "num_input_tokens_seen": 50464096, "step": 805 }, { "epoch": 2.682196339434276, "grad_norm": 16.723764419555664, "learning_rate": 5e-06, "loss": 0.6458, "num_input_tokens_seen": 50526736, "step": 806 }, { "epoch": 2.682196339434276, "loss": 0.7537314891815186, "loss_ce": 0.0008018329390324652, "loss_iou": 0.279296875, "loss_num": 0.03857421875, "loss_xval": 0.75390625, "num_input_tokens_seen": 50526736, "step": 806 }, { "epoch": 2.6855241264559067, "grad_norm": 15.801563262939453, "learning_rate": 5e-06, "loss": 0.6904, "num_input_tokens_seen": 50589548, "step": 807 }, { "epoch": 2.6855241264559067, "loss": 0.5793936252593994, "loss_ce": 4.7907709813443944e-05, "loss_iou": 0.16015625, "loss_num": 0.052001953125, "loss_xval": 0.578125, "num_input_tokens_seen": 50589548, "step": 807 }, { "epoch": 2.6888519134775377, "grad_norm": 12.457715034484863, "learning_rate": 5e-06, "loss": 1.0248, "num_input_tokens_seen": 50650376, "step": 808 }, { "epoch": 2.6888519134775377, "loss": 1.0397467613220215, "loss_ce": 0.00019596036872826517, "loss_iou": 0.37890625, "loss_num": 0.05615234375, "loss_xval": 1.0390625, "num_input_tokens_seen": 50650376, "step": 808 }, { "epoch": 2.6921797004991683, "grad_norm": 23.163949966430664, "learning_rate": 5e-06, "loss": 0.6915, "num_input_tokens_seen": 50713052, "step": 809 }, { "epoch": 2.6921797004991683, "loss": 0.7694122791290283, "loss_ce": 0.00012515315029304475, "loss_iou": 0.28515625, "loss_num": 0.0400390625, "loss_xval": 0.76953125, "num_input_tokens_seen": 50713052, "step": 809 }, { "epoch": 2.695507487520799, "grad_norm": 18.702228546142578, "learning_rate": 5e-06, "loss": 0.9216, "num_input_tokens_seen": 50776796, "step": 810 }, { "epoch": 2.695507487520799, "loss": 0.9874943494796753, "loss_ce": 0.00018971107783727348, "loss_iou": 0.3671875, "loss_num": 0.051025390625, "loss_xval": 0.98828125, "num_input_tokens_seen": 50776796, "step": 810 }, { "epoch": 2.6988352745424296, "grad_norm": 9.986546516418457, "learning_rate": 5e-06, "loss": 0.9254, "num_input_tokens_seen": 50839964, "step": 811 }, { "epoch": 2.6988352745424296, "loss": 0.8390403985977173, "loss_ce": 5.112246435601264e-05, "loss_iou": 0.265625, "loss_num": 0.0615234375, "loss_xval": 0.83984375, "num_input_tokens_seen": 50839964, "step": 811 }, { "epoch": 2.70216306156406, "grad_norm": 15.212403297424316, "learning_rate": 5e-06, "loss": 0.8605, "num_input_tokens_seen": 50901932, "step": 812 }, { "epoch": 2.70216306156406, "loss": 0.7976114749908447, "loss_ce": 0.0004923460073769093, "loss_iou": 0.26953125, "loss_num": 0.0517578125, "loss_xval": 0.796875, "num_input_tokens_seen": 50901932, "step": 812 }, { "epoch": 2.7054908485856908, "grad_norm": 25.891193389892578, "learning_rate": 5e-06, "loss": 0.8721, "num_input_tokens_seen": 50964524, "step": 813 }, { "epoch": 2.7054908485856908, "loss": 0.8218099474906921, "loss_ce": 3.2617710530757904e-05, "loss_iou": 0.1953125, "loss_num": 0.08642578125, "loss_xval": 0.8203125, "num_input_tokens_seen": 50964524, "step": 813 }, { "epoch": 2.7088186356073214, "grad_norm": 15.289278984069824, "learning_rate": 5e-06, "loss": 0.7168, "num_input_tokens_seen": 51027040, "step": 814 }, { "epoch": 2.7088186356073214, "loss": 0.5973207950592041, "loss_ce": 0.0008241605246439576, "loss_iou": 0.193359375, "loss_num": 0.0419921875, "loss_xval": 0.59765625, "num_input_tokens_seen": 51027040, "step": 814 }, { "epoch": 2.712146422628952, "grad_norm": 12.39792537689209, "learning_rate": 5e-06, "loss": 0.803, "num_input_tokens_seen": 51089884, "step": 815 }, { "epoch": 2.712146422628952, "loss": 0.7842392325401306, "loss_ce": 0.0014480899553745985, "loss_iou": 0.2353515625, "loss_num": 0.0625, "loss_xval": 0.78125, "num_input_tokens_seen": 51089884, "step": 815 }, { "epoch": 2.7154742096505826, "grad_norm": 74.54549407958984, "learning_rate": 5e-06, "loss": 0.8014, "num_input_tokens_seen": 51152684, "step": 816 }, { "epoch": 2.7154742096505826, "loss": 0.8306292295455933, "loss_ce": 0.000184907199582085, "loss_iou": 0.279296875, "loss_num": 0.054443359375, "loss_xval": 0.83203125, "num_input_tokens_seen": 51152684, "step": 816 }, { "epoch": 2.7188019966722132, "grad_norm": 11.50522518157959, "learning_rate": 5e-06, "loss": 0.7829, "num_input_tokens_seen": 51214500, "step": 817 }, { "epoch": 2.7188019966722132, "loss": 0.881805419921875, "loss_ce": 0.00045781210064888, "loss_iou": 0.28515625, "loss_num": 0.06201171875, "loss_xval": 0.8828125, "num_input_tokens_seen": 51214500, "step": 817 }, { "epoch": 2.722129783693844, "grad_norm": 13.687999725341797, "learning_rate": 5e-06, "loss": 0.8651, "num_input_tokens_seen": 51276604, "step": 818 }, { "epoch": 2.722129783693844, "loss": 0.8142944574356079, "loss_ce": 0.0005737675237469375, "loss_iou": 0.28515625, "loss_num": 0.048828125, "loss_xval": 0.8125, "num_input_tokens_seen": 51276604, "step": 818 }, { "epoch": 2.7254575707154745, "grad_norm": 5.8822712898254395, "learning_rate": 5e-06, "loss": 0.5399, "num_input_tokens_seen": 51337268, "step": 819 }, { "epoch": 2.7254575707154745, "loss": 0.4156995415687561, "loss_ce": 0.00017218466382473707, "loss_iou": 0.1279296875, "loss_num": 0.03173828125, "loss_xval": 0.416015625, "num_input_tokens_seen": 51337268, "step": 819 }, { "epoch": 2.728785357737105, "grad_norm": 10.577842712402344, "learning_rate": 5e-06, "loss": 0.5331, "num_input_tokens_seen": 51398040, "step": 820 }, { "epoch": 2.728785357737105, "loss": 0.44522571563720703, "loss_ce": 0.0008897931547835469, "loss_iou": 0.10986328125, "loss_num": 0.044921875, "loss_xval": 0.4453125, "num_input_tokens_seen": 51398040, "step": 820 }, { "epoch": 2.7321131447587357, "grad_norm": 14.673174858093262, "learning_rate": 5e-06, "loss": 0.9165, "num_input_tokens_seen": 51459948, "step": 821 }, { "epoch": 2.7321131447587357, "loss": 1.0592856407165527, "loss_ce": 0.00020362591021694243, "loss_iou": 0.32421875, "loss_num": 0.08203125, "loss_xval": 1.0625, "num_input_tokens_seen": 51459948, "step": 821 }, { "epoch": 2.7354409317803663, "grad_norm": 10.287886619567871, "learning_rate": 5e-06, "loss": 0.8561, "num_input_tokens_seen": 51524140, "step": 822 }, { "epoch": 2.7354409317803663, "loss": 0.8826354742050171, "loss_ce": 0.0007995132473297417, "loss_iou": 0.2890625, "loss_num": 0.060302734375, "loss_xval": 0.8828125, "num_input_tokens_seen": 51524140, "step": 822 }, { "epoch": 2.738768718801997, "grad_norm": 9.983254432678223, "learning_rate": 5e-06, "loss": 0.823, "num_input_tokens_seen": 51587172, "step": 823 }, { "epoch": 2.738768718801997, "loss": 1.0295748710632324, "loss_ce": 3.3884258300531656e-05, "loss_iou": 0.3125, "loss_num": 0.08056640625, "loss_xval": 1.03125, "num_input_tokens_seen": 51587172, "step": 823 }, { "epoch": 2.7420965058236275, "grad_norm": 10.106569290161133, "learning_rate": 5e-06, "loss": 0.8517, "num_input_tokens_seen": 51649148, "step": 824 }, { "epoch": 2.7420965058236275, "loss": 0.7791653275489807, "loss_ce": 0.0006008880445733666, "loss_iou": 0.30859375, "loss_num": 0.031982421875, "loss_xval": 0.77734375, "num_input_tokens_seen": 51649148, "step": 824 }, { "epoch": 2.745424292845258, "grad_norm": 20.371871948242188, "learning_rate": 5e-06, "loss": 1.0496, "num_input_tokens_seen": 51712836, "step": 825 }, { "epoch": 2.745424292845258, "loss": 0.855181097984314, "loss_ce": 0.01924358680844307, "loss_iou": 0.294921875, "loss_num": 0.049560546875, "loss_xval": 0.8359375, "num_input_tokens_seen": 51712836, "step": 825 }, { "epoch": 2.7487520798668887, "grad_norm": 12.943856239318848, "learning_rate": 5e-06, "loss": 0.8533, "num_input_tokens_seen": 51775596, "step": 826 }, { "epoch": 2.7487520798668887, "loss": 0.914795994758606, "loss_ce": 0.0004892984288744628, "loss_iou": 0.318359375, "loss_num": 0.0556640625, "loss_xval": 0.9140625, "num_input_tokens_seen": 51775596, "step": 826 }, { "epoch": 2.7520798668885194, "grad_norm": 8.180317878723145, "learning_rate": 5e-06, "loss": 0.6992, "num_input_tokens_seen": 51838420, "step": 827 }, { "epoch": 2.7520798668885194, "loss": 0.7848867177963257, "loss_ce": 0.0020497734658420086, "loss_iou": 0.26953125, "loss_num": 0.049072265625, "loss_xval": 0.78125, "num_input_tokens_seen": 51838420, "step": 827 }, { "epoch": 2.75540765391015, "grad_norm": 15.182528495788574, "learning_rate": 5e-06, "loss": 0.8481, "num_input_tokens_seen": 51901276, "step": 828 }, { "epoch": 2.75540765391015, "loss": 0.7813282012939453, "loss_ce": 0.002519632689654827, "loss_iou": 0.21484375, "loss_num": 0.06982421875, "loss_xval": 0.77734375, "num_input_tokens_seen": 51901276, "step": 828 }, { "epoch": 2.7587354409317806, "grad_norm": 7.6279377937316895, "learning_rate": 5e-06, "loss": 0.6484, "num_input_tokens_seen": 51962060, "step": 829 }, { "epoch": 2.7587354409317806, "loss": 0.7650944590568542, "loss_ce": 0.00203291280195117, "loss_iou": 0.23046875, "loss_num": 0.06005859375, "loss_xval": 0.76171875, "num_input_tokens_seen": 51962060, "step": 829 }, { "epoch": 2.762063227953411, "grad_norm": 10.674771308898926, "learning_rate": 5e-06, "loss": 0.817, "num_input_tokens_seen": 52025592, "step": 830 }, { "epoch": 2.762063227953411, "loss": 0.7599319815635681, "loss_ce": 0.00016639150271657854, "loss_iou": 0.296875, "loss_num": 0.033203125, "loss_xval": 0.7578125, "num_input_tokens_seen": 52025592, "step": 830 }, { "epoch": 2.765391014975042, "grad_norm": 19.160572052001953, "learning_rate": 5e-06, "loss": 0.9251, "num_input_tokens_seen": 52088076, "step": 831 }, { "epoch": 2.765391014975042, "loss": 0.805517315864563, "loss_ce": 0.0005856447969563305, "loss_iou": 0.283203125, "loss_num": 0.047607421875, "loss_xval": 0.8046875, "num_input_tokens_seen": 52088076, "step": 831 }, { "epoch": 2.7687188019966724, "grad_norm": 6.1606125831604, "learning_rate": 5e-06, "loss": 0.5771, "num_input_tokens_seen": 52150536, "step": 832 }, { "epoch": 2.7687188019966724, "loss": 0.6053818464279175, "loss_ce": 0.0006455372204072773, "loss_iou": 0.1259765625, "loss_num": 0.0703125, "loss_xval": 0.60546875, "num_input_tokens_seen": 52150536, "step": 832 }, { "epoch": 2.772046589018303, "grad_norm": 22.167266845703125, "learning_rate": 5e-06, "loss": 0.9371, "num_input_tokens_seen": 52213100, "step": 833 }, { "epoch": 2.772046589018303, "loss": 0.9295365810394287, "loss_ce": 0.00021526089403778315, "loss_iou": 0.302734375, "loss_num": 0.0654296875, "loss_xval": 0.9296875, "num_input_tokens_seen": 52213100, "step": 833 }, { "epoch": 2.7753743760399336, "grad_norm": 10.496980667114258, "learning_rate": 5e-06, "loss": 0.6609, "num_input_tokens_seen": 52276464, "step": 834 }, { "epoch": 2.7753743760399336, "loss": 0.5460642576217651, "loss_ce": 0.00016583751130383462, "loss_iou": 0.2197265625, "loss_num": 0.021240234375, "loss_xval": 0.546875, "num_input_tokens_seen": 52276464, "step": 834 }, { "epoch": 2.7787021630615643, "grad_norm": 10.732155799865723, "learning_rate": 5e-06, "loss": 0.8073, "num_input_tokens_seen": 52340528, "step": 835 }, { "epoch": 2.7787021630615643, "loss": 0.6755124926567078, "loss_ce": 0.0002195164270233363, "loss_iou": 0.197265625, "loss_num": 0.055908203125, "loss_xval": 0.67578125, "num_input_tokens_seen": 52340528, "step": 835 }, { "epoch": 2.782029950083195, "grad_norm": 18.337326049804688, "learning_rate": 5e-06, "loss": 0.7644, "num_input_tokens_seen": 52403600, "step": 836 }, { "epoch": 2.782029950083195, "loss": 0.5846283435821533, "loss_ce": 0.0003998374450020492, "loss_iou": 0.1669921875, "loss_num": 0.0498046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 52403600, "step": 836 }, { "epoch": 2.7853577371048255, "grad_norm": 68.70188903808594, "learning_rate": 5e-06, "loss": 0.884, "num_input_tokens_seen": 52466592, "step": 837 }, { "epoch": 2.7853577371048255, "loss": 1.1231420040130615, "loss_ce": 0.0011938156094402075, "loss_iou": 0.384765625, "loss_num": 0.0703125, "loss_xval": 1.125, "num_input_tokens_seen": 52466592, "step": 837 }, { "epoch": 2.788685524126456, "grad_norm": 35.47993850708008, "learning_rate": 5e-06, "loss": 0.9647, "num_input_tokens_seen": 52529452, "step": 838 }, { "epoch": 2.788685524126456, "loss": 0.9949101209640503, "loss_ce": 0.0007694980013184249, "loss_iou": 0.3203125, "loss_num": 0.07080078125, "loss_xval": 0.9921875, "num_input_tokens_seen": 52529452, "step": 838 }, { "epoch": 2.7920133111480867, "grad_norm": 21.51439094543457, "learning_rate": 5e-06, "loss": 0.8653, "num_input_tokens_seen": 52592540, "step": 839 }, { "epoch": 2.7920133111480867, "loss": 0.947022020816803, "loss_ce": 0.0014653451507911086, "loss_iou": 0.345703125, "loss_num": 0.05078125, "loss_xval": 0.9453125, "num_input_tokens_seen": 52592540, "step": 839 }, { "epoch": 2.7953410981697173, "grad_norm": 16.271461486816406, "learning_rate": 5e-06, "loss": 0.7983, "num_input_tokens_seen": 52654704, "step": 840 }, { "epoch": 2.7953410981697173, "loss": 0.980492353439331, "loss_ce": 0.0014884801348671317, "loss_iou": 0.28125, "loss_num": 0.08349609375, "loss_xval": 0.98046875, "num_input_tokens_seen": 52654704, "step": 840 }, { "epoch": 2.798668885191348, "grad_norm": 10.07279109954834, "learning_rate": 5e-06, "loss": 0.5959, "num_input_tokens_seen": 52716292, "step": 841 }, { "epoch": 2.798668885191348, "loss": 0.4731648564338684, "loss_ce": 0.000630666094366461, "loss_iou": 0.1494140625, "loss_num": 0.034912109375, "loss_xval": 0.47265625, "num_input_tokens_seen": 52716292, "step": 841 }, { "epoch": 2.8019966722129785, "grad_norm": 13.416818618774414, "learning_rate": 5e-06, "loss": 0.9959, "num_input_tokens_seen": 52780908, "step": 842 }, { "epoch": 2.8019966722129785, "loss": 1.2821091413497925, "loss_ce": 0.0006150341359898448, "loss_iou": 0.42578125, "loss_num": 0.0859375, "loss_xval": 1.28125, "num_input_tokens_seen": 52780908, "step": 842 }, { "epoch": 2.805324459234609, "grad_norm": 23.089929580688477, "learning_rate": 5e-06, "loss": 0.6385, "num_input_tokens_seen": 52842208, "step": 843 }, { "epoch": 2.805324459234609, "loss": 0.59052574634552, "loss_ce": 0.0006820209673605859, "loss_iou": 0.1962890625, "loss_num": 0.03955078125, "loss_xval": 0.58984375, "num_input_tokens_seen": 52842208, "step": 843 }, { "epoch": 2.8086522462562398, "grad_norm": 18.833816528320312, "learning_rate": 5e-06, "loss": 0.9746, "num_input_tokens_seen": 52904996, "step": 844 }, { "epoch": 2.8086522462562398, "loss": 1.0083526372909546, "loss_ce": 5.187302303966135e-05, "loss_iou": 0.388671875, "loss_num": 0.046142578125, "loss_xval": 1.0078125, "num_input_tokens_seen": 52904996, "step": 844 }, { "epoch": 2.8119800332778704, "grad_norm": 14.17634105682373, "learning_rate": 5e-06, "loss": 0.884, "num_input_tokens_seen": 52969668, "step": 845 }, { "epoch": 2.8119800332778704, "loss": 1.1289470195770264, "loss_ce": 0.0019939455669373274, "loss_iou": 0.439453125, "loss_num": 0.050048828125, "loss_xval": 1.125, "num_input_tokens_seen": 52969668, "step": 845 }, { "epoch": 2.815307820299501, "grad_norm": 23.1187801361084, "learning_rate": 5e-06, "loss": 0.7, "num_input_tokens_seen": 53031812, "step": 846 }, { "epoch": 2.815307820299501, "loss": 0.7373853921890259, "loss_ce": 8.065036672633141e-05, "loss_iou": 0.2470703125, "loss_num": 0.048583984375, "loss_xval": 0.73828125, "num_input_tokens_seen": 53031812, "step": 846 }, { "epoch": 2.8186356073211316, "grad_norm": 9.363740921020508, "learning_rate": 5e-06, "loss": 0.9656, "num_input_tokens_seen": 53095744, "step": 847 }, { "epoch": 2.8186356073211316, "loss": 1.1773841381072998, "loss_ce": 0.0018469768110662699, "loss_iou": 0.384765625, "loss_num": 0.08154296875, "loss_xval": 1.171875, "num_input_tokens_seen": 53095744, "step": 847 }, { "epoch": 2.821963394342762, "grad_norm": 11.104792594909668, "learning_rate": 5e-06, "loss": 0.8244, "num_input_tokens_seen": 53158744, "step": 848 }, { "epoch": 2.821963394342762, "loss": 0.9474537372589111, "loss_ce": 0.0006763727869838476, "loss_iou": 0.2578125, "loss_num": 0.08642578125, "loss_xval": 0.9453125, "num_input_tokens_seen": 53158744, "step": 848 }, { "epoch": 2.825291181364393, "grad_norm": 12.321479797363281, "learning_rate": 5e-06, "loss": 0.9694, "num_input_tokens_seen": 53221352, "step": 849 }, { "epoch": 2.825291181364393, "loss": 0.9421533346176147, "loss_ce": 0.0009911877568811178, "loss_iou": 0.26953125, "loss_num": 0.0810546875, "loss_xval": 0.94140625, "num_input_tokens_seen": 53221352, "step": 849 }, { "epoch": 2.8286189683860234, "grad_norm": 12.530994415283203, "learning_rate": 5e-06, "loss": 0.9866, "num_input_tokens_seen": 53285452, "step": 850 }, { "epoch": 2.8286189683860234, "loss": 1.0217738151550293, "loss_ce": 0.001021851203404367, "loss_iou": 0.375, "loss_num": 0.05419921875, "loss_xval": 1.0234375, "num_input_tokens_seen": 53285452, "step": 850 }, { "epoch": 2.831946755407654, "grad_norm": 15.440099716186523, "learning_rate": 5e-06, "loss": 0.8441, "num_input_tokens_seen": 53346748, "step": 851 }, { "epoch": 2.831946755407654, "loss": 0.7872898578643799, "loss_ce": 0.00018045374599751085, "loss_iou": 0.2197265625, "loss_num": 0.0693359375, "loss_xval": 0.7890625, "num_input_tokens_seen": 53346748, "step": 851 }, { "epoch": 2.8352745424292847, "grad_norm": 28.69334602355957, "learning_rate": 5e-06, "loss": 0.8573, "num_input_tokens_seen": 53409872, "step": 852 }, { "epoch": 2.8352745424292847, "loss": 0.8631025552749634, "loss_ce": 0.002262737834826112, "loss_iou": 0.26171875, "loss_num": 0.0673828125, "loss_xval": 0.859375, "num_input_tokens_seen": 53409872, "step": 852 }, { "epoch": 2.8386023294509153, "grad_norm": 12.272283554077148, "learning_rate": 5e-06, "loss": 0.8724, "num_input_tokens_seen": 53471764, "step": 853 }, { "epoch": 2.8386023294509153, "loss": 1.037917971611023, "loss_ce": 0.0020293924026191235, "loss_iou": 0.3125, "loss_num": 0.08203125, "loss_xval": 1.0390625, "num_input_tokens_seen": 53471764, "step": 853 }, { "epoch": 2.841930116472546, "grad_norm": 19.907730102539062, "learning_rate": 5e-06, "loss": 0.8915, "num_input_tokens_seen": 53535256, "step": 854 }, { "epoch": 2.841930116472546, "loss": 1.0508040189743042, "loss_ce": 0.0014875233173370361, "loss_iou": 0.40234375, "loss_num": 0.049560546875, "loss_xval": 1.046875, "num_input_tokens_seen": 53535256, "step": 854 }, { "epoch": 2.8452579034941765, "grad_norm": 16.519927978515625, "learning_rate": 5e-06, "loss": 0.8323, "num_input_tokens_seen": 53598600, "step": 855 }, { "epoch": 2.8452579034941765, "loss": 0.9220311045646667, "loss_ce": 0.0006444402388297021, "loss_iou": 0.306640625, "loss_num": 0.061279296875, "loss_xval": 0.921875, "num_input_tokens_seen": 53598600, "step": 855 }, { "epoch": 2.848585690515807, "grad_norm": 21.156627655029297, "learning_rate": 5e-06, "loss": 0.8297, "num_input_tokens_seen": 53660192, "step": 856 }, { "epoch": 2.848585690515807, "loss": 0.6860744953155518, "loss_ce": 3.937574365409091e-05, "loss_iou": 0.25390625, "loss_num": 0.03564453125, "loss_xval": 0.6875, "num_input_tokens_seen": 53660192, "step": 856 }, { "epoch": 2.8519134775374377, "grad_norm": 15.835363388061523, "learning_rate": 5e-06, "loss": 1.1557, "num_input_tokens_seen": 53723952, "step": 857 }, { "epoch": 2.8519134775374377, "loss": 1.1079440116882324, "loss_ce": 0.0005221446044743061, "loss_iou": 0.375, "loss_num": 0.0712890625, "loss_xval": 1.109375, "num_input_tokens_seen": 53723952, "step": 857 }, { "epoch": 2.8552412645590683, "grad_norm": 9.046875, "learning_rate": 5e-06, "loss": 0.8038, "num_input_tokens_seen": 53785616, "step": 858 }, { "epoch": 2.8552412645590683, "loss": 0.8684693574905396, "loss_ce": 0.0004273601807653904, "loss_iou": 0.326171875, "loss_num": 0.04345703125, "loss_xval": 0.8671875, "num_input_tokens_seen": 53785616, "step": 858 }, { "epoch": 2.858569051580699, "grad_norm": 46.66500473022461, "learning_rate": 5e-06, "loss": 1.2536, "num_input_tokens_seen": 53849740, "step": 859 }, { "epoch": 2.858569051580699, "loss": 1.047816276550293, "loss_ce": 0.0068005844950675964, "loss_iou": 0.3828125, "loss_num": 0.05517578125, "loss_xval": 1.0390625, "num_input_tokens_seen": 53849740, "step": 859 }, { "epoch": 2.8618968386023296, "grad_norm": 25.487953186035156, "learning_rate": 5e-06, "loss": 0.8454, "num_input_tokens_seen": 53911752, "step": 860 }, { "epoch": 2.8618968386023296, "loss": 0.7410845160484314, "loss_ce": 0.00017875817138701677, "loss_iou": 0.208984375, "loss_num": 0.06494140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 53911752, "step": 860 }, { "epoch": 2.86522462562396, "grad_norm": 8.039831161499023, "learning_rate": 5e-06, "loss": 0.4574, "num_input_tokens_seen": 53973596, "step": 861 }, { "epoch": 2.86522462562396, "loss": 0.4054723381996155, "loss_ce": 0.00019890097610186785, "loss_iou": 0.107421875, "loss_num": 0.038330078125, "loss_xval": 0.40625, "num_input_tokens_seen": 53973596, "step": 861 }, { "epoch": 2.868552412645591, "grad_norm": 12.18427562713623, "learning_rate": 5e-06, "loss": 0.777, "num_input_tokens_seen": 54037804, "step": 862 }, { "epoch": 2.868552412645591, "loss": 0.7093939781188965, "loss_ce": 0.004559976048767567, "loss_iou": 0.251953125, "loss_num": 0.039794921875, "loss_xval": 0.703125, "num_input_tokens_seen": 54037804, "step": 862 }, { "epoch": 2.8718801996672214, "grad_norm": 11.70964527130127, "learning_rate": 5e-06, "loss": 0.8396, "num_input_tokens_seen": 54099912, "step": 863 }, { "epoch": 2.8718801996672214, "loss": 0.9924643635749817, "loss_ce": 0.0005210142116993666, "loss_iou": 0.302734375, "loss_num": 0.0771484375, "loss_xval": 0.9921875, "num_input_tokens_seen": 54099912, "step": 863 }, { "epoch": 2.875207986688852, "grad_norm": 12.803565979003906, "learning_rate": 5e-06, "loss": 0.7378, "num_input_tokens_seen": 54162264, "step": 864 }, { "epoch": 2.875207986688852, "loss": 0.7761240005493164, "loss_ce": 0.002198257017880678, "loss_iou": 0.2216796875, "loss_num": 0.06591796875, "loss_xval": 0.7734375, "num_input_tokens_seen": 54162264, "step": 864 }, { "epoch": 2.8785357737104826, "grad_norm": 11.305477142333984, "learning_rate": 5e-06, "loss": 0.7493, "num_input_tokens_seen": 54225144, "step": 865 }, { "epoch": 2.8785357737104826, "loss": 0.8462469577789307, "loss_ce": 0.0005437912186607718, "loss_iou": 0.2890625, "loss_num": 0.053466796875, "loss_xval": 0.84375, "num_input_tokens_seen": 54225144, "step": 865 }, { "epoch": 2.8818635607321132, "grad_norm": 12.183488845825195, "learning_rate": 5e-06, "loss": 1.0525, "num_input_tokens_seen": 54286592, "step": 866 }, { "epoch": 2.8818635607321132, "loss": 1.0839440822601318, "loss_ce": 0.0009362882701680064, "loss_iou": 0.3359375, "loss_num": 0.08203125, "loss_xval": 1.0859375, "num_input_tokens_seen": 54286592, "step": 866 }, { "epoch": 2.885191347753744, "grad_norm": 10.725017547607422, "learning_rate": 5e-06, "loss": 1.0308, "num_input_tokens_seen": 54348900, "step": 867 }, { "epoch": 2.885191347753744, "loss": 0.9614525437355042, "loss_ce": 2.6763778805616312e-05, "loss_iou": 0.328125, "loss_num": 0.06103515625, "loss_xval": 0.9609375, "num_input_tokens_seen": 54348900, "step": 867 }, { "epoch": 2.8885191347753745, "grad_norm": 12.298053741455078, "learning_rate": 5e-06, "loss": 0.7657, "num_input_tokens_seen": 54411716, "step": 868 }, { "epoch": 2.8885191347753745, "loss": 0.7437622547149658, "loss_ce": 0.0010864399373531342, "loss_iou": 0.2431640625, "loss_num": 0.051025390625, "loss_xval": 0.7421875, "num_input_tokens_seen": 54411716, "step": 868 }, { "epoch": 2.891846921797005, "grad_norm": 29.771516799926758, "learning_rate": 5e-06, "loss": 0.8699, "num_input_tokens_seen": 54474996, "step": 869 }, { "epoch": 2.891846921797005, "loss": 0.875381350517273, "loss_ce": 0.0006255026091821492, "loss_iou": 0.296875, "loss_num": 0.05615234375, "loss_xval": 0.875, "num_input_tokens_seen": 54474996, "step": 869 }, { "epoch": 2.8951747088186357, "grad_norm": 22.231077194213867, "learning_rate": 5e-06, "loss": 0.6807, "num_input_tokens_seen": 54538292, "step": 870 }, { "epoch": 2.8951747088186357, "loss": 0.6149954795837402, "loss_ce": 0.0007987120770849288, "loss_iou": 0.2265625, "loss_num": 0.0322265625, "loss_xval": 0.61328125, "num_input_tokens_seen": 54538292, "step": 870 }, { "epoch": 2.8985024958402663, "grad_norm": 13.072921752929688, "learning_rate": 5e-06, "loss": 0.9311, "num_input_tokens_seen": 54600360, "step": 871 }, { "epoch": 2.8985024958402663, "loss": 1.268268346786499, "loss_ce": 0.0002019708917941898, "loss_iou": 0.390625, "loss_num": 0.09716796875, "loss_xval": 1.265625, "num_input_tokens_seen": 54600360, "step": 871 }, { "epoch": 2.901830282861897, "grad_norm": 12.388557434082031, "learning_rate": 5e-06, "loss": 0.8199, "num_input_tokens_seen": 54662656, "step": 872 }, { "epoch": 2.901830282861897, "loss": 0.6191840767860413, "loss_ce": 4.344484841567464e-05, "loss_iou": 0.203125, "loss_num": 0.04248046875, "loss_xval": 0.6171875, "num_input_tokens_seen": 54662656, "step": 872 }, { "epoch": 2.9051580698835275, "grad_norm": 7.256121635437012, "learning_rate": 5e-06, "loss": 0.7057, "num_input_tokens_seen": 54723256, "step": 873 }, { "epoch": 2.9051580698835275, "loss": 0.5848498344421387, "loss_ce": 0.001109591918066144, "loss_iou": 0.1484375, "loss_num": 0.05712890625, "loss_xval": 0.58203125, "num_input_tokens_seen": 54723256, "step": 873 }, { "epoch": 2.908485856905158, "grad_norm": 30.038572311401367, "learning_rate": 5e-06, "loss": 1.0476, "num_input_tokens_seen": 54786872, "step": 874 }, { "epoch": 2.908485856905158, "loss": 0.7975156903266907, "loss_ce": 0.000274513236945495, "loss_iou": 0.24609375, "loss_num": 0.061279296875, "loss_xval": 0.796875, "num_input_tokens_seen": 54786872, "step": 874 }, { "epoch": 2.9118136439267888, "grad_norm": 27.706817626953125, "learning_rate": 5e-06, "loss": 0.8176, "num_input_tokens_seen": 54848424, "step": 875 }, { "epoch": 2.9118136439267888, "loss": 0.6453001499176025, "loss_ce": 0.0006468780338764191, "loss_iou": 0.1865234375, "loss_num": 0.05419921875, "loss_xval": 0.64453125, "num_input_tokens_seen": 54848424, "step": 875 }, { "epoch": 2.9151414309484194, "grad_norm": 16.334976196289062, "learning_rate": 5e-06, "loss": 0.828, "num_input_tokens_seen": 54910524, "step": 876 }, { "epoch": 2.9151414309484194, "loss": 0.9851783514022827, "loss_ce": 7.095631735865027e-05, "loss_iou": 0.310546875, "loss_num": 0.0732421875, "loss_xval": 0.984375, "num_input_tokens_seen": 54910524, "step": 876 }, { "epoch": 2.91846921797005, "grad_norm": 26.634248733520508, "learning_rate": 5e-06, "loss": 0.8181, "num_input_tokens_seen": 54973516, "step": 877 }, { "epoch": 2.91846921797005, "loss": 0.83323734998703, "loss_ce": 0.00035160701372660697, "loss_iou": 0.2890625, "loss_num": 0.05078125, "loss_xval": 0.83203125, "num_input_tokens_seen": 54973516, "step": 877 }, { "epoch": 2.9217970049916806, "grad_norm": 22.770780563354492, "learning_rate": 5e-06, "loss": 0.897, "num_input_tokens_seen": 55034692, "step": 878 }, { "epoch": 2.9217970049916806, "loss": 0.9181663990020752, "loss_ce": 0.00019761671137530357, "loss_iou": 0.328125, "loss_num": 0.05224609375, "loss_xval": 0.91796875, "num_input_tokens_seen": 55034692, "step": 878 }, { "epoch": 2.925124792013311, "grad_norm": 14.436359405517578, "learning_rate": 5e-06, "loss": 0.7651, "num_input_tokens_seen": 55096088, "step": 879 }, { "epoch": 2.925124792013311, "loss": 0.8270174860954285, "loss_ce": 0.00011315852316329256, "loss_iou": 0.255859375, "loss_num": 0.06298828125, "loss_xval": 0.828125, "num_input_tokens_seen": 55096088, "step": 879 }, { "epoch": 2.928452579034942, "grad_norm": 21.74774742126465, "learning_rate": 5e-06, "loss": 0.9191, "num_input_tokens_seen": 55159628, "step": 880 }, { "epoch": 2.928452579034942, "loss": 0.9564664363861084, "loss_ce": 0.0008694813586771488, "loss_iou": 0.3359375, "loss_num": 0.056396484375, "loss_xval": 0.95703125, "num_input_tokens_seen": 55159628, "step": 880 }, { "epoch": 2.9317803660565724, "grad_norm": 13.85302448272705, "learning_rate": 5e-06, "loss": 0.6639, "num_input_tokens_seen": 55222304, "step": 881 }, { "epoch": 2.9317803660565724, "loss": 0.6171444654464722, "loss_ce": 0.0015438641421496868, "loss_iou": 0.2060546875, "loss_num": 0.04052734375, "loss_xval": 0.6171875, "num_input_tokens_seen": 55222304, "step": 881 }, { "epoch": 2.935108153078203, "grad_norm": 15.573785781860352, "learning_rate": 5e-06, "loss": 0.9698, "num_input_tokens_seen": 55285600, "step": 882 }, { "epoch": 2.935108153078203, "loss": 1.1866347789764404, "loss_ce": 0.0001112837198888883, "loss_iou": 0.44140625, "loss_num": 0.060791015625, "loss_xval": 1.1875, "num_input_tokens_seen": 55285600, "step": 882 }, { "epoch": 2.9384359400998337, "grad_norm": 9.72053337097168, "learning_rate": 5e-06, "loss": 1.011, "num_input_tokens_seen": 55349256, "step": 883 }, { "epoch": 2.9384359400998337, "loss": 0.9649059176445007, "loss_ce": 0.0007946036057546735, "loss_iou": 0.3125, "loss_num": 0.06787109375, "loss_xval": 0.96484375, "num_input_tokens_seen": 55349256, "step": 883 }, { "epoch": 2.9417637271214643, "grad_norm": 40.047515869140625, "learning_rate": 5e-06, "loss": 0.9117, "num_input_tokens_seen": 55413708, "step": 884 }, { "epoch": 2.9417637271214643, "loss": 0.8685333132743835, "loss_ce": 0.0026885762345045805, "loss_iou": 0.28125, "loss_num": 0.060546875, "loss_xval": 0.8671875, "num_input_tokens_seen": 55413708, "step": 884 }, { "epoch": 2.945091514143095, "grad_norm": 29.746788024902344, "learning_rate": 5e-06, "loss": 1.1525, "num_input_tokens_seen": 55478204, "step": 885 }, { "epoch": 2.945091514143095, "loss": 1.2547807693481445, "loss_ce": 0.00624566525220871, "loss_iou": 0.453125, "loss_num": 0.06884765625, "loss_xval": 1.25, "num_input_tokens_seen": 55478204, "step": 885 }, { "epoch": 2.9484193011647255, "grad_norm": 14.427910804748535, "learning_rate": 5e-06, "loss": 0.8744, "num_input_tokens_seen": 55540140, "step": 886 }, { "epoch": 2.9484193011647255, "loss": 0.9295307993888855, "loss_ce": 0.0006977797020226717, "loss_iou": 0.302734375, "loss_num": 0.064453125, "loss_xval": 0.9296875, "num_input_tokens_seen": 55540140, "step": 886 }, { "epoch": 2.951747088186356, "grad_norm": 13.86884593963623, "learning_rate": 5e-06, "loss": 0.7577, "num_input_tokens_seen": 55602724, "step": 887 }, { "epoch": 2.951747088186356, "loss": 0.9324434399604797, "loss_ce": 0.0006807436002418399, "loss_iou": 0.314453125, "loss_num": 0.060546875, "loss_xval": 0.93359375, "num_input_tokens_seen": 55602724, "step": 887 }, { "epoch": 2.9550748752079867, "grad_norm": 14.230158805847168, "learning_rate": 5e-06, "loss": 1.1293, "num_input_tokens_seen": 55667092, "step": 888 }, { "epoch": 2.9550748752079867, "loss": 1.015046238899231, "loss_ce": 0.0013743427116423845, "loss_iou": 0.349609375, "loss_num": 0.06298828125, "loss_xval": 1.015625, "num_input_tokens_seen": 55667092, "step": 888 }, { "epoch": 2.9584026622296173, "grad_norm": 13.532299041748047, "learning_rate": 5e-06, "loss": 0.7976, "num_input_tokens_seen": 55729756, "step": 889 }, { "epoch": 2.9584026622296173, "loss": 0.7759445905685425, "loss_ce": 0.0006149307591840625, "loss_iou": 0.2734375, "loss_num": 0.04541015625, "loss_xval": 0.7734375, "num_input_tokens_seen": 55729756, "step": 889 }, { "epoch": 2.961730449251248, "grad_norm": 15.530501365661621, "learning_rate": 5e-06, "loss": 0.6693, "num_input_tokens_seen": 55792828, "step": 890 }, { "epoch": 2.961730449251248, "loss": 0.7313098907470703, "loss_ce": 0.0003528632805682719, "loss_iou": 0.2041015625, "loss_num": 0.06494140625, "loss_xval": 0.73046875, "num_input_tokens_seen": 55792828, "step": 890 }, { "epoch": 2.9650582362728786, "grad_norm": 15.952990531921387, "learning_rate": 5e-06, "loss": 0.8108, "num_input_tokens_seen": 55854952, "step": 891 }, { "epoch": 2.9650582362728786, "loss": 0.6857528686523438, "loss_ce": 0.0009384113945998251, "loss_iou": 0.23828125, "loss_num": 0.041748046875, "loss_xval": 0.68359375, "num_input_tokens_seen": 55854952, "step": 891 }, { "epoch": 2.968386023294509, "grad_norm": 7.85128116607666, "learning_rate": 5e-06, "loss": 0.8289, "num_input_tokens_seen": 55917956, "step": 892 }, { "epoch": 2.968386023294509, "loss": 0.668018102645874, "loss_ce": 4.93402112624608e-05, "loss_iou": 0.23046875, "loss_num": 0.04150390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 55917956, "step": 892 }, { "epoch": 2.97171381031614, "grad_norm": 17.0181884765625, "learning_rate": 5e-06, "loss": 0.6734, "num_input_tokens_seen": 55979892, "step": 893 }, { "epoch": 2.97171381031614, "loss": 0.6943376064300537, "loss_ce": 0.0009782593697309494, "loss_iou": 0.201171875, "loss_num": 0.05810546875, "loss_xval": 0.6953125, "num_input_tokens_seen": 55979892, "step": 893 }, { "epoch": 2.9750415973377704, "grad_norm": 24.690635681152344, "learning_rate": 5e-06, "loss": 0.7435, "num_input_tokens_seen": 56042384, "step": 894 }, { "epoch": 2.9750415973377704, "loss": 0.47268909215927124, "loss_ce": 0.00015488307690247893, "loss_iou": 0.158203125, "loss_num": 0.031494140625, "loss_xval": 0.47265625, "num_input_tokens_seen": 56042384, "step": 894 }, { "epoch": 2.978369384359401, "grad_norm": 18.475412368774414, "learning_rate": 5e-06, "loss": 0.8394, "num_input_tokens_seen": 56104388, "step": 895 }, { "epoch": 2.978369384359401, "loss": 0.7747071981430054, "loss_ce": 4.900360363535583e-05, "loss_iou": 0.17578125, "loss_num": 0.0849609375, "loss_xval": 0.7734375, "num_input_tokens_seen": 56104388, "step": 895 }, { "epoch": 2.9816971713810316, "grad_norm": 10.097528457641602, "learning_rate": 5e-06, "loss": 0.5897, "num_input_tokens_seen": 56167044, "step": 896 }, { "epoch": 2.9816971713810316, "loss": 0.6344481706619263, "loss_ce": 0.0010939212515950203, "loss_iou": 0.1767578125, "loss_num": 0.055908203125, "loss_xval": 0.6328125, "num_input_tokens_seen": 56167044, "step": 896 }, { "epoch": 2.9850249584026622, "grad_norm": 15.803515434265137, "learning_rate": 5e-06, "loss": 0.623, "num_input_tokens_seen": 56229968, "step": 897 }, { "epoch": 2.9850249584026622, "loss": 0.4044834077358246, "loss_ce": 0.00033913765219040215, "loss_iou": 0.1103515625, "loss_num": 0.036865234375, "loss_xval": 0.404296875, "num_input_tokens_seen": 56229968, "step": 897 }, { "epoch": 2.988352745424293, "grad_norm": 10.865888595581055, "learning_rate": 5e-06, "loss": 0.706, "num_input_tokens_seen": 56293428, "step": 898 }, { "epoch": 2.988352745424293, "loss": 0.6543700695037842, "loss_ce": 1.2155430340499151e-05, "loss_iou": 0.2021484375, "loss_num": 0.05029296875, "loss_xval": 0.65625, "num_input_tokens_seen": 56293428, "step": 898 }, { "epoch": 2.9916805324459235, "grad_norm": 10.439217567443848, "learning_rate": 5e-06, "loss": 0.9566, "num_input_tokens_seen": 56353876, "step": 899 }, { "epoch": 2.9916805324459235, "loss": 0.7114546298980713, "loss_ce": 2.8855994969489984e-05, "loss_iou": 0.2451171875, "loss_num": 0.044189453125, "loss_xval": 0.7109375, "num_input_tokens_seen": 56353876, "step": 899 }, { "epoch": 2.995008319467554, "grad_norm": 31.885540008544922, "learning_rate": 5e-06, "loss": 1.0362, "num_input_tokens_seen": 56418016, "step": 900 }, { "epoch": 2.995008319467554, "loss": 1.0725525617599487, "loss_ce": 0.00028691417537629604, "loss_iou": 0.359375, "loss_num": 0.0712890625, "loss_xval": 1.0703125, "num_input_tokens_seen": 56418016, "step": 900 }, { "epoch": 2.9983361064891847, "grad_norm": 31.542757034301758, "learning_rate": 5e-06, "loss": 0.915, "num_input_tokens_seen": 56479904, "step": 901 }, { "epoch": 2.9983361064891847, "loss": 1.0275923013687134, "loss_ce": 0.0026899336371570826, "loss_iou": 0.35546875, "loss_num": 0.06298828125, "loss_xval": 1.0234375, "num_input_tokens_seen": 56479904, "step": 901 }, { "epoch": 2.9983361064891847, "loss": 0.8745320439338684, "loss_ce": 0.00026448973221704364, "loss_iou": 0.271484375, "loss_num": 0.06689453125, "loss_xval": 0.875, "num_input_tokens_seen": 56512420, "step": 901 }, { "epoch": 3.0016638935108153, "grad_norm": 20.686283111572266, "learning_rate": 5e-06, "loss": 0.9452, "num_input_tokens_seen": 56544136, "step": 902 }, { "epoch": 3.0016638935108153, "loss": 1.015965461730957, "loss_ce": 0.0008286432130262256, "loss_iou": 0.35546875, "loss_num": 0.060791015625, "loss_xval": 1.015625, "num_input_tokens_seen": 56544136, "step": 902 }, { "epoch": 3.004991680532446, "grad_norm": 30.531023025512695, "learning_rate": 5e-06, "loss": 0.9768, "num_input_tokens_seen": 56606324, "step": 903 }, { "epoch": 3.004991680532446, "loss": 0.9323800802230835, "loss_ce": 0.00012908241478726268, "loss_iou": 0.326171875, "loss_num": 0.05615234375, "loss_xval": 0.93359375, "num_input_tokens_seen": 56606324, "step": 903 }, { "epoch": 3.0083194675540765, "grad_norm": 13.145706176757812, "learning_rate": 5e-06, "loss": 0.7993, "num_input_tokens_seen": 56668452, "step": 904 }, { "epoch": 3.0083194675540765, "loss": 0.9373518228530884, "loss_ce": 9.595306619303301e-05, "loss_iou": 0.298828125, "loss_num": 0.06787109375, "loss_xval": 0.9375, "num_input_tokens_seen": 56668452, "step": 904 }, { "epoch": 3.011647254575707, "grad_norm": 157.36032104492188, "learning_rate": 5e-06, "loss": 0.6418, "num_input_tokens_seen": 56730152, "step": 905 }, { "epoch": 3.011647254575707, "loss": 0.5183830261230469, "loss_ce": 0.002696982817724347, "loss_iou": 0.0, "loss_num": 0.10302734375, "loss_xval": 0.515625, "num_input_tokens_seen": 56730152, "step": 905 }, { "epoch": 3.0149750415973378, "grad_norm": 36.10614013671875, "learning_rate": 5e-06, "loss": 0.7523, "num_input_tokens_seen": 56792164, "step": 906 }, { "epoch": 3.0149750415973378, "loss": 1.0320461988449097, "loss_ce": 6.377464160323143e-05, "loss_iou": 0.35546875, "loss_num": 0.0634765625, "loss_xval": 1.03125, "num_input_tokens_seen": 56792164, "step": 906 }, { "epoch": 3.0183028286189684, "grad_norm": 6.202398777008057, "learning_rate": 5e-06, "loss": 0.6772, "num_input_tokens_seen": 56852804, "step": 907 }, { "epoch": 3.0183028286189684, "loss": 0.6682400107383728, "loss_ce": 2.7143531042383984e-05, "loss_iou": 0.224609375, "loss_num": 0.043701171875, "loss_xval": 0.66796875, "num_input_tokens_seen": 56852804, "step": 907 }, { "epoch": 3.021630615640599, "grad_norm": 15.27850341796875, "learning_rate": 5e-06, "loss": 0.8055, "num_input_tokens_seen": 56915900, "step": 908 }, { "epoch": 3.021630615640599, "loss": 0.9380273222923279, "loss_ce": 3.903326069121249e-05, "loss_iou": 0.341796875, "loss_num": 0.05078125, "loss_xval": 0.9375, "num_input_tokens_seen": 56915900, "step": 908 }, { "epoch": 3.0249584026622296, "grad_norm": 24.6085147857666, "learning_rate": 5e-06, "loss": 1.1187, "num_input_tokens_seen": 56978488, "step": 909 }, { "epoch": 3.0249584026622296, "loss": 1.3314964771270752, "loss_ce": 0.002883219625800848, "loss_iou": 0.470703125, "loss_num": 0.0771484375, "loss_xval": 1.328125, "num_input_tokens_seen": 56978488, "step": 909 }, { "epoch": 3.02828618968386, "grad_norm": 21.691267013549805, "learning_rate": 5e-06, "loss": 0.7511, "num_input_tokens_seen": 57042312, "step": 910 }, { "epoch": 3.02828618968386, "loss": 0.7569243907928467, "loss_ce": 0.0004546991258393973, "loss_iou": 0.248046875, "loss_num": 0.05224609375, "loss_xval": 0.7578125, "num_input_tokens_seen": 57042312, "step": 910 }, { "epoch": 3.031613976705491, "grad_norm": 18.6723690032959, "learning_rate": 5e-06, "loss": 0.6642, "num_input_tokens_seen": 57103640, "step": 911 }, { "epoch": 3.031613976705491, "loss": 0.7280317544937134, "loss_ce": 0.00024860326084308326, "loss_iou": 0.216796875, "loss_num": 0.05859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 57103640, "step": 911 }, { "epoch": 3.0349417637271214, "grad_norm": 16.631738662719727, "learning_rate": 5e-06, "loss": 0.6276, "num_input_tokens_seen": 57165720, "step": 912 }, { "epoch": 3.0349417637271214, "loss": 0.6402130126953125, "loss_ce": 0.0010528563288971782, "loss_iou": 0.21484375, "loss_num": 0.041748046875, "loss_xval": 0.640625, "num_input_tokens_seen": 57165720, "step": 912 }, { "epoch": 3.038269550748752, "grad_norm": 11.869771957397461, "learning_rate": 5e-06, "loss": 0.8904, "num_input_tokens_seen": 57228768, "step": 913 }, { "epoch": 3.038269550748752, "loss": 0.7842072248458862, "loss_ce": 2.7603602575254627e-05, "loss_iou": 0.2041015625, "loss_num": 0.07470703125, "loss_xval": 0.78515625, "num_input_tokens_seen": 57228768, "step": 913 }, { "epoch": 3.0415973377703827, "grad_norm": 16.256175994873047, "learning_rate": 5e-06, "loss": 0.7901, "num_input_tokens_seen": 57290928, "step": 914 }, { "epoch": 3.0415973377703827, "loss": 0.9927595257759094, "loss_ce": 8.371622243430465e-05, "loss_iou": 0.3359375, "loss_num": 0.0634765625, "loss_xval": 0.9921875, "num_input_tokens_seen": 57290928, "step": 914 }, { "epoch": 3.0449251247920133, "grad_norm": 24.751188278198242, "learning_rate": 5e-06, "loss": 0.6958, "num_input_tokens_seen": 57353872, "step": 915 }, { "epoch": 3.0449251247920133, "loss": 0.7315420508384705, "loss_ce": 0.00424710288643837, "loss_iou": 0.23046875, "loss_num": 0.052734375, "loss_xval": 0.7265625, "num_input_tokens_seen": 57353872, "step": 915 }, { "epoch": 3.048252911813644, "grad_norm": 16.81283187866211, "learning_rate": 5e-06, "loss": 1.0033, "num_input_tokens_seen": 57418248, "step": 916 }, { "epoch": 3.048252911813644, "loss": 0.8678412437438965, "loss_ce": 0.0008979164995253086, "loss_iou": 0.341796875, "loss_num": 0.037109375, "loss_xval": 0.8671875, "num_input_tokens_seen": 57418248, "step": 916 }, { "epoch": 3.0515806988352745, "grad_norm": 8.283062934875488, "learning_rate": 5e-06, "loss": 0.8209, "num_input_tokens_seen": 57481256, "step": 917 }, { "epoch": 3.0515806988352745, "loss": 0.8476381897926331, "loss_ce": 0.00047019918565638363, "loss_iou": 0.30078125, "loss_num": 0.049560546875, "loss_xval": 0.84765625, "num_input_tokens_seen": 57481256, "step": 917 }, { "epoch": 3.054908485856905, "grad_norm": 19.258445739746094, "learning_rate": 5e-06, "loss": 0.7102, "num_input_tokens_seen": 57543436, "step": 918 }, { "epoch": 3.054908485856905, "loss": 0.8542848825454712, "loss_ce": 0.0001589024905115366, "loss_iou": 0.2490234375, "loss_num": 0.0712890625, "loss_xval": 0.85546875, "num_input_tokens_seen": 57543436, "step": 918 }, { "epoch": 3.0582362728785357, "grad_norm": 24.21323013305664, "learning_rate": 5e-06, "loss": 0.7274, "num_input_tokens_seen": 57606988, "step": 919 }, { "epoch": 3.0582362728785357, "loss": 0.861693263053894, "loss_ce": 0.0008534241351298988, "loss_iou": 0.294921875, "loss_num": 0.053955078125, "loss_xval": 0.859375, "num_input_tokens_seen": 57606988, "step": 919 }, { "epoch": 3.0615640599001663, "grad_norm": 28.660486221313477, "learning_rate": 5e-06, "loss": 0.5461, "num_input_tokens_seen": 57668872, "step": 920 }, { "epoch": 3.0615640599001663, "loss": 0.6151924133300781, "loss_ce": 0.0004463810473680496, "loss_iou": 0.2119140625, "loss_num": 0.038330078125, "loss_xval": 0.61328125, "num_input_tokens_seen": 57668872, "step": 920 }, { "epoch": 3.064891846921797, "grad_norm": 10.633936882019043, "learning_rate": 5e-06, "loss": 0.9587, "num_input_tokens_seen": 57732472, "step": 921 }, { "epoch": 3.064891846921797, "loss": 1.0549280643463135, "loss_ce": 0.00048464565770700574, "loss_iou": 0.369140625, "loss_num": 0.06298828125, "loss_xval": 1.0546875, "num_input_tokens_seen": 57732472, "step": 921 }, { "epoch": 3.0682196339434276, "grad_norm": 26.227346420288086, "learning_rate": 5e-06, "loss": 0.966, "num_input_tokens_seen": 57796336, "step": 922 }, { "epoch": 3.0682196339434276, "loss": 0.8780733346939087, "loss_ce": 0.0028292066417634487, "loss_iou": 0.251953125, "loss_num": 0.07470703125, "loss_xval": 0.875, "num_input_tokens_seen": 57796336, "step": 922 }, { "epoch": 3.071547420965058, "grad_norm": 20.741628646850586, "learning_rate": 5e-06, "loss": 0.8608, "num_input_tokens_seen": 57861404, "step": 923 }, { "epoch": 3.071547420965058, "loss": 0.9321870803833008, "loss_ce": 0.00030230951961129904, "loss_iou": 0.341796875, "loss_num": 0.050048828125, "loss_xval": 0.93359375, "num_input_tokens_seen": 57861404, "step": 923 }, { "epoch": 3.074875207986689, "grad_norm": 10.478264808654785, "learning_rate": 5e-06, "loss": 0.598, "num_input_tokens_seen": 57923204, "step": 924 }, { "epoch": 3.074875207986689, "loss": 0.46035149693489075, "loss_ce": 8.537084795534611e-05, "loss_iou": 0.09716796875, "loss_num": 0.05322265625, "loss_xval": 0.4609375, "num_input_tokens_seen": 57923204, "step": 924 }, { "epoch": 3.0782029950083194, "grad_norm": 15.388328552246094, "learning_rate": 5e-06, "loss": 1.0425, "num_input_tokens_seen": 57985892, "step": 925 }, { "epoch": 3.0782029950083194, "loss": 1.1015738248825073, "loss_ce": 0.001964439172297716, "loss_iou": 0.33203125, "loss_num": 0.0869140625, "loss_xval": 1.1015625, "num_input_tokens_seen": 57985892, "step": 925 }, { "epoch": 3.08153078202995, "grad_norm": 10.75679874420166, "learning_rate": 5e-06, "loss": 0.796, "num_input_tokens_seen": 58049620, "step": 926 }, { "epoch": 3.08153078202995, "loss": 0.7979081273078918, "loss_ce": 5.657908695866354e-05, "loss_iou": 0.2265625, "loss_num": 0.06884765625, "loss_xval": 0.796875, "num_input_tokens_seen": 58049620, "step": 926 }, { "epoch": 3.0848585690515806, "grad_norm": 11.969529151916504, "learning_rate": 5e-06, "loss": 0.577, "num_input_tokens_seen": 58111152, "step": 927 }, { "epoch": 3.0848585690515806, "loss": 0.7319214344024658, "loss_ce": 0.00010991955059580505, "loss_iou": 0.248046875, "loss_num": 0.04736328125, "loss_xval": 0.73046875, "num_input_tokens_seen": 58111152, "step": 927 }, { "epoch": 3.0881863560732112, "grad_norm": 9.87148666381836, "learning_rate": 5e-06, "loss": 0.7442, "num_input_tokens_seen": 58175032, "step": 928 }, { "epoch": 3.0881863560732112, "loss": 0.6432963013648987, "loss_ce": 0.0014505886938422918, "loss_iou": 0.2216796875, "loss_num": 0.03955078125, "loss_xval": 0.640625, "num_input_tokens_seen": 58175032, "step": 928 }, { "epoch": 3.091514143094842, "grad_norm": 52.29267501831055, "learning_rate": 5e-06, "loss": 0.9138, "num_input_tokens_seen": 58237164, "step": 929 }, { "epoch": 3.091514143094842, "loss": 0.9514411091804504, "loss_ce": 0.0002692709385883063, "loss_iou": 0.3359375, "loss_num": 0.055419921875, "loss_xval": 0.953125, "num_input_tokens_seen": 58237164, "step": 929 }, { "epoch": 3.0948419301164725, "grad_norm": 12.875478744506836, "learning_rate": 5e-06, "loss": 0.9315, "num_input_tokens_seen": 58300844, "step": 930 }, { "epoch": 3.0948419301164725, "loss": 0.9565946459770203, "loss_ce": 0.0007841241895221174, "loss_iou": 0.33203125, "loss_num": 0.058349609375, "loss_xval": 0.95703125, "num_input_tokens_seen": 58300844, "step": 930 }, { "epoch": 3.098169717138103, "grad_norm": 22.145496368408203, "learning_rate": 5e-06, "loss": 0.7771, "num_input_tokens_seen": 58365028, "step": 931 }, { "epoch": 3.098169717138103, "loss": 0.6949498653411865, "loss_ce": 0.00036975587136112154, "loss_iou": 0.259765625, "loss_num": 0.034912109375, "loss_xval": 0.6953125, "num_input_tokens_seen": 58365028, "step": 931 }, { "epoch": 3.1014975041597337, "grad_norm": 16.330707550048828, "learning_rate": 5e-06, "loss": 0.7964, "num_input_tokens_seen": 58426292, "step": 932 }, { "epoch": 3.1014975041597337, "loss": 0.9439092874526978, "loss_ce": 0.0003057535504922271, "loss_iou": 0.25390625, "loss_num": 0.0869140625, "loss_xval": 0.9453125, "num_input_tokens_seen": 58426292, "step": 932 }, { "epoch": 3.1048252911813643, "grad_norm": 10.452964782714844, "learning_rate": 5e-06, "loss": 0.7588, "num_input_tokens_seen": 58488240, "step": 933 }, { "epoch": 3.1048252911813643, "loss": 0.8706070780754089, "loss_ce": 0.001954704290255904, "loss_iou": 0.296875, "loss_num": 0.0546875, "loss_xval": 0.8671875, "num_input_tokens_seen": 58488240, "step": 933 }, { "epoch": 3.108153078202995, "grad_norm": 18.522306442260742, "learning_rate": 5e-06, "loss": 0.9628, "num_input_tokens_seen": 58551624, "step": 934 }, { "epoch": 3.108153078202995, "loss": 0.8793235421180725, "loss_ce": 0.0013938324991613626, "loss_iou": 0.345703125, "loss_num": 0.037841796875, "loss_xval": 0.87890625, "num_input_tokens_seen": 58551624, "step": 934 }, { "epoch": 3.1114808652246255, "grad_norm": 7.724632740020752, "learning_rate": 5e-06, "loss": 0.778, "num_input_tokens_seen": 58613036, "step": 935 }, { "epoch": 3.1114808652246255, "loss": 1.0509037971496582, "loss_ce": 0.007446764037013054, "loss_iou": 0.26171875, "loss_num": 0.10400390625, "loss_xval": 1.046875, "num_input_tokens_seen": 58613036, "step": 935 }, { "epoch": 3.114808652246256, "grad_norm": 12.31716537475586, "learning_rate": 5e-06, "loss": 0.9438, "num_input_tokens_seen": 58675872, "step": 936 }, { "epoch": 3.114808652246256, "loss": 0.9533208608627319, "loss_ce": 0.0005620683077722788, "loss_iou": 0.30859375, "loss_num": 0.06689453125, "loss_xval": 0.953125, "num_input_tokens_seen": 58675872, "step": 936 }, { "epoch": 3.1181364392678868, "grad_norm": 14.161133766174316, "learning_rate": 5e-06, "loss": 0.8589, "num_input_tokens_seen": 58738884, "step": 937 }, { "epoch": 3.1181364392678868, "loss": 0.8286876678466797, "loss_ce": 0.0001964616822078824, "loss_iou": 0.310546875, "loss_num": 0.041259765625, "loss_xval": 0.828125, "num_input_tokens_seen": 58738884, "step": 937 }, { "epoch": 3.1214642262895174, "grad_norm": 17.004718780517578, "learning_rate": 5e-06, "loss": 0.7602, "num_input_tokens_seen": 58802072, "step": 938 }, { "epoch": 3.1214642262895174, "loss": 0.821278989315033, "loss_ce": 0.0004782435134984553, "loss_iou": 0.298828125, "loss_num": 0.044921875, "loss_xval": 0.8203125, "num_input_tokens_seen": 58802072, "step": 938 }, { "epoch": 3.124792013311148, "grad_norm": 12.556039810180664, "learning_rate": 5e-06, "loss": 0.8359, "num_input_tokens_seen": 58864708, "step": 939 }, { "epoch": 3.124792013311148, "loss": 0.8781052827835083, "loss_ce": 0.0005417764186859131, "loss_iou": 0.298828125, "loss_num": 0.055908203125, "loss_xval": 0.87890625, "num_input_tokens_seen": 58864708, "step": 939 }, { "epoch": 3.1281198003327786, "grad_norm": 14.815278053283691, "learning_rate": 5e-06, "loss": 0.8864, "num_input_tokens_seen": 58926916, "step": 940 }, { "epoch": 3.1281198003327786, "loss": 0.9597880840301514, "loss_ce": 0.0008037795196287334, "loss_iou": 0.357421875, "loss_num": 0.048828125, "loss_xval": 0.9609375, "num_input_tokens_seen": 58926916, "step": 940 }, { "epoch": 3.131447587354409, "grad_norm": 17.76370620727539, "learning_rate": 5e-06, "loss": 0.84, "num_input_tokens_seen": 58988668, "step": 941 }, { "epoch": 3.131447587354409, "loss": 0.88735431432724, "loss_ce": 0.00042194739216938615, "loss_iou": 0.32421875, "loss_num": 0.048095703125, "loss_xval": 0.88671875, "num_input_tokens_seen": 58988668, "step": 941 }, { "epoch": 3.13477537437604, "grad_norm": 36.662315368652344, "learning_rate": 5e-06, "loss": 0.8738, "num_input_tokens_seen": 59052520, "step": 942 }, { "epoch": 3.13477537437604, "loss": 0.8464229106903076, "loss_ce": 0.0009028796339407563, "loss_iou": 0.322265625, "loss_num": 0.0400390625, "loss_xval": 0.84375, "num_input_tokens_seen": 59052520, "step": 942 }, { "epoch": 3.1381031613976704, "grad_norm": 8.20991039276123, "learning_rate": 5e-06, "loss": 0.5881, "num_input_tokens_seen": 59114480, "step": 943 }, { "epoch": 3.1381031613976704, "loss": 0.5021640658378601, "loss_ce": 8.887949661584571e-05, "loss_iou": 0.1484375, "loss_num": 0.041259765625, "loss_xval": 0.50390625, "num_input_tokens_seen": 59114480, "step": 943 }, { "epoch": 3.141430948419301, "grad_norm": 18.209897994995117, "learning_rate": 5e-06, "loss": 1.0336, "num_input_tokens_seen": 59175212, "step": 944 }, { "epoch": 3.141430948419301, "loss": 1.2060582637786865, "loss_ce": 0.002444992307573557, "loss_iou": 0.40234375, "loss_num": 0.080078125, "loss_xval": 1.203125, "num_input_tokens_seen": 59175212, "step": 944 }, { "epoch": 3.1447587354409317, "grad_norm": 19.541431427001953, "learning_rate": 5e-06, "loss": 0.9663, "num_input_tokens_seen": 59239460, "step": 945 }, { "epoch": 3.1447587354409317, "loss": 0.7803661823272705, "loss_ce": 9.27629807847552e-05, "loss_iou": 0.265625, "loss_num": 0.0498046875, "loss_xval": 0.78125, "num_input_tokens_seen": 59239460, "step": 945 }, { "epoch": 3.1480865224625623, "grad_norm": 10.669411659240723, "learning_rate": 5e-06, "loss": 1.0049, "num_input_tokens_seen": 59301360, "step": 946 }, { "epoch": 3.1480865224625623, "loss": 1.1545720100402832, "loss_ce": 3.09227907564491e-05, "loss_iou": 0.35546875, "loss_num": 0.0888671875, "loss_xval": 1.15625, "num_input_tokens_seen": 59301360, "step": 946 }, { "epoch": 3.151414309484193, "grad_norm": 12.653473854064941, "learning_rate": 5e-06, "loss": 0.7067, "num_input_tokens_seen": 59365016, "step": 947 }, { "epoch": 3.151414309484193, "loss": 0.7654366493225098, "loss_ce": 0.0005440429667942226, "loss_iou": 0.2734375, "loss_num": 0.0439453125, "loss_xval": 0.765625, "num_input_tokens_seen": 59365016, "step": 947 }, { "epoch": 3.1547420965058235, "grad_norm": 11.480525016784668, "learning_rate": 5e-06, "loss": 0.6465, "num_input_tokens_seen": 59427212, "step": 948 }, { "epoch": 3.1547420965058235, "loss": 0.8721858263015747, "loss_ce": 0.0003596378955990076, "loss_iou": 0.326171875, "loss_num": 0.04345703125, "loss_xval": 0.87109375, "num_input_tokens_seen": 59427212, "step": 948 }, { "epoch": 3.158069883527454, "grad_norm": 34.165103912353516, "learning_rate": 5e-06, "loss": 0.9143, "num_input_tokens_seen": 59491056, "step": 949 }, { "epoch": 3.158069883527454, "loss": 0.9038797616958618, "loss_ce": 7.110174919944257e-05, "loss_iou": 0.30859375, "loss_num": 0.057861328125, "loss_xval": 0.90234375, "num_input_tokens_seen": 59491056, "step": 949 }, { "epoch": 3.1613976705490847, "grad_norm": 24.460308074951172, "learning_rate": 5e-06, "loss": 0.827, "num_input_tokens_seen": 59552212, "step": 950 }, { "epoch": 3.1613976705490847, "loss": 0.9148125052452087, "loss_ce": 1.7638567442190833e-05, "loss_iou": 0.291015625, "loss_num": 0.06689453125, "loss_xval": 0.9140625, "num_input_tokens_seen": 59552212, "step": 950 }, { "epoch": 3.1647254575707153, "grad_norm": 16.602678298950195, "learning_rate": 5e-06, "loss": 0.7661, "num_input_tokens_seen": 59615180, "step": 951 }, { "epoch": 3.1647254575707153, "loss": 0.9075579047203064, "loss_ce": 0.0015520007582381368, "loss_iou": 0.25, "loss_num": 0.0810546875, "loss_xval": 0.90625, "num_input_tokens_seen": 59615180, "step": 951 }, { "epoch": 3.168053244592346, "grad_norm": 14.823358535766602, "learning_rate": 5e-06, "loss": 0.69, "num_input_tokens_seen": 59678104, "step": 952 }, { "epoch": 3.168053244592346, "loss": 0.7148585319519043, "loss_ce": 1.4785388884774875e-05, "loss_iou": 0.26171875, "loss_num": 0.038330078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 59678104, "step": 952 }, { "epoch": 3.1713810316139766, "grad_norm": 63.57862091064453, "learning_rate": 5e-06, "loss": 0.7865, "num_input_tokens_seen": 59740976, "step": 953 }, { "epoch": 3.1713810316139766, "loss": 0.5665551424026489, "loss_ce": 0.001247492036782205, "loss_iou": 0.1572265625, "loss_num": 0.050048828125, "loss_xval": 0.56640625, "num_input_tokens_seen": 59740976, "step": 953 }, { "epoch": 3.174708818635607, "grad_norm": 34.066715240478516, "learning_rate": 5e-06, "loss": 0.812, "num_input_tokens_seen": 59803580, "step": 954 }, { "epoch": 3.174708818635607, "loss": 0.7728263139724731, "loss_ce": 0.0002432619803585112, "loss_iou": 0.28515625, "loss_num": 0.04052734375, "loss_xval": 0.7734375, "num_input_tokens_seen": 59803580, "step": 954 }, { "epoch": 3.178036605657238, "grad_norm": 31.254560470581055, "learning_rate": 5e-06, "loss": 1.0825, "num_input_tokens_seen": 59865328, "step": 955 }, { "epoch": 3.178036605657238, "loss": 0.8610126972198486, "loss_ce": 0.00017285677313338965, "loss_iou": 0.25, "loss_num": 0.072265625, "loss_xval": 0.859375, "num_input_tokens_seen": 59865328, "step": 955 }, { "epoch": 3.1813643926788684, "grad_norm": 10.921689987182617, "learning_rate": 5e-06, "loss": 1.0658, "num_input_tokens_seen": 59927552, "step": 956 }, { "epoch": 3.1813643926788684, "loss": 1.1395654678344727, "loss_ce": 0.0006494110566563904, "loss_iou": 0.40234375, "loss_num": 0.06689453125, "loss_xval": 1.140625, "num_input_tokens_seen": 59927552, "step": 956 }, { "epoch": 3.184692179700499, "grad_norm": 9.880757331848145, "learning_rate": 5e-06, "loss": 0.8318, "num_input_tokens_seen": 59989664, "step": 957 }, { "epoch": 3.184692179700499, "loss": 0.9110987186431885, "loss_ce": 0.0009424776071682572, "loss_iou": 0.3359375, "loss_num": 0.047607421875, "loss_xval": 0.91015625, "num_input_tokens_seen": 59989664, "step": 957 }, { "epoch": 3.1880199667221296, "grad_norm": 9.38766860961914, "learning_rate": 5e-06, "loss": 0.6643, "num_input_tokens_seen": 60052276, "step": 958 }, { "epoch": 3.1880199667221296, "loss": 0.7620002031326294, "loss_ce": 3.7309764593373984e-05, "loss_iou": 0.25390625, "loss_num": 0.050537109375, "loss_xval": 0.76171875, "num_input_tokens_seen": 60052276, "step": 958 }, { "epoch": 3.1913477537437602, "grad_norm": 16.43718719482422, "learning_rate": 5e-06, "loss": 0.9924, "num_input_tokens_seen": 60116120, "step": 959 }, { "epoch": 3.1913477537437602, "loss": 1.1171211004257202, "loss_ce": 0.0013984288088977337, "loss_iou": 0.375, "loss_num": 0.0732421875, "loss_xval": 1.1171875, "num_input_tokens_seen": 60116120, "step": 959 }, { "epoch": 3.194675540765391, "grad_norm": 12.237974166870117, "learning_rate": 5e-06, "loss": 0.8707, "num_input_tokens_seen": 60179548, "step": 960 }, { "epoch": 3.194675540765391, "loss": 0.8710145950317383, "loss_ce": 0.0006532109691761434, "loss_iou": 0.32421875, "loss_num": 0.044921875, "loss_xval": 0.87109375, "num_input_tokens_seen": 60179548, "step": 960 }, { "epoch": 3.1980033277870215, "grad_norm": 24.791534423828125, "learning_rate": 5e-06, "loss": 0.8278, "num_input_tokens_seen": 60241708, "step": 961 }, { "epoch": 3.1980033277870215, "loss": 0.7581548690795898, "loss_ce": 0.0022954712621867657, "loss_iou": 0.2431640625, "loss_num": 0.053955078125, "loss_xval": 0.7578125, "num_input_tokens_seen": 60241708, "step": 961 }, { "epoch": 3.201331114808652, "grad_norm": 28.264705657958984, "learning_rate": 5e-06, "loss": 0.473, "num_input_tokens_seen": 60304508, "step": 962 }, { "epoch": 3.201331114808652, "loss": 0.5443636178970337, "loss_ce": 0.0008455467177554965, "loss_iou": 0.197265625, "loss_num": 0.02978515625, "loss_xval": 0.54296875, "num_input_tokens_seen": 60304508, "step": 962 }, { "epoch": 3.2046589018302827, "grad_norm": 10.311064720153809, "learning_rate": 5e-06, "loss": 0.6768, "num_input_tokens_seen": 60367092, "step": 963 }, { "epoch": 3.2046589018302827, "loss": 0.8569867610931396, "loss_ce": 5.31335172127001e-05, "loss_iou": 0.287109375, "loss_num": 0.056640625, "loss_xval": 0.85546875, "num_input_tokens_seen": 60367092, "step": 963 }, { "epoch": 3.2079866888519133, "grad_norm": 10.463589668273926, "learning_rate": 5e-06, "loss": 0.838, "num_input_tokens_seen": 60429284, "step": 964 }, { "epoch": 3.2079866888519133, "loss": 0.6484445333480835, "loss_ce": 0.0007394892745651305, "loss_iou": 0.2216796875, "loss_num": 0.040771484375, "loss_xval": 0.6484375, "num_input_tokens_seen": 60429284, "step": 964 }, { "epoch": 3.211314475873544, "grad_norm": 7.585339546203613, "learning_rate": 5e-06, "loss": 0.5347, "num_input_tokens_seen": 60491188, "step": 965 }, { "epoch": 3.211314475873544, "loss": 0.5105918049812317, "loss_ce": 0.0004599463427439332, "loss_iou": 0.1669921875, "loss_num": 0.03515625, "loss_xval": 0.51171875, "num_input_tokens_seen": 60491188, "step": 965 }, { "epoch": 3.2146422628951745, "grad_norm": 9.942543029785156, "learning_rate": 5e-06, "loss": 0.62, "num_input_tokens_seen": 60553992, "step": 966 }, { "epoch": 3.2146422628951745, "loss": 0.46438688039779663, "loss_ce": 0.00039763684617355466, "loss_iou": 0.1435546875, "loss_num": 0.035400390625, "loss_xval": 0.46484375, "num_input_tokens_seen": 60553992, "step": 966 }, { "epoch": 3.217970049916805, "grad_norm": 11.350386619567871, "learning_rate": 5e-06, "loss": 0.887, "num_input_tokens_seen": 60615900, "step": 967 }, { "epoch": 3.217970049916805, "loss": 0.9106756448745728, "loss_ce": 3.116061998298392e-05, "loss_iou": 0.34375, "loss_num": 0.044921875, "loss_xval": 0.91015625, "num_input_tokens_seen": 60615900, "step": 967 }, { "epoch": 3.2212978369384357, "grad_norm": 11.828593254089355, "learning_rate": 5e-06, "loss": 0.9105, "num_input_tokens_seen": 60678052, "step": 968 }, { "epoch": 3.2212978369384357, "loss": 0.7735366225242615, "loss_ce": 9.917082934407517e-05, "loss_iou": 0.2138671875, "loss_num": 0.0693359375, "loss_xval": 0.7734375, "num_input_tokens_seen": 60678052, "step": 968 }, { "epoch": 3.2246256239600664, "grad_norm": 6.765110969543457, "learning_rate": 5e-06, "loss": 0.7719, "num_input_tokens_seen": 60740316, "step": 969 }, { "epoch": 3.2246256239600664, "loss": 1.1397228240966797, "loss_ce": 0.0015393083449453115, "loss_iou": 0.361328125, "loss_num": 0.08349609375, "loss_xval": 1.140625, "num_input_tokens_seen": 60740316, "step": 969 }, { "epoch": 3.227953410981697, "grad_norm": 14.274063110351562, "learning_rate": 5e-06, "loss": 0.7065, "num_input_tokens_seen": 60801452, "step": 970 }, { "epoch": 3.227953410981697, "loss": 0.7572785019874573, "loss_ce": 0.00019845366477966309, "loss_iou": 0.244140625, "loss_num": 0.053466796875, "loss_xval": 0.7578125, "num_input_tokens_seen": 60801452, "step": 970 }, { "epoch": 3.2312811980033276, "grad_norm": 7.189883232116699, "learning_rate": 5e-06, "loss": 0.8128, "num_input_tokens_seen": 60863756, "step": 971 }, { "epoch": 3.2312811980033276, "loss": 0.9057375192642212, "loss_ce": 0.00021990106324665248, "loss_iou": 0.349609375, "loss_num": 0.041259765625, "loss_xval": 0.90625, "num_input_tokens_seen": 60863756, "step": 971 }, { "epoch": 3.234608985024958, "grad_norm": 20.947919845581055, "learning_rate": 5e-06, "loss": 1.029, "num_input_tokens_seen": 60926592, "step": 972 }, { "epoch": 3.234608985024958, "loss": 0.9959653615951538, "loss_ce": 0.0006040430162101984, "loss_iou": 0.322265625, "loss_num": 0.0703125, "loss_xval": 0.99609375, "num_input_tokens_seen": 60926592, "step": 972 }, { "epoch": 3.237936772046589, "grad_norm": 14.039640426635742, "learning_rate": 5e-06, "loss": 0.8004, "num_input_tokens_seen": 60990536, "step": 973 }, { "epoch": 3.237936772046589, "loss": 0.8818901777267456, "loss_ce": 5.422691538115032e-05, "loss_iou": 0.330078125, "loss_num": 0.04443359375, "loss_xval": 0.8828125, "num_input_tokens_seen": 60990536, "step": 973 }, { "epoch": 3.2412645590682194, "grad_norm": 12.025979042053223, "learning_rate": 5e-06, "loss": 0.6277, "num_input_tokens_seen": 61053216, "step": 974 }, { "epoch": 3.2412645590682194, "loss": 0.6128069758415222, "loss_ce": 0.0011126084718853235, "loss_iou": 0.21875, "loss_num": 0.03466796875, "loss_xval": 0.61328125, "num_input_tokens_seen": 61053216, "step": 974 }, { "epoch": 3.24459234608985, "grad_norm": 13.156522750854492, "learning_rate": 5e-06, "loss": 0.7027, "num_input_tokens_seen": 61116740, "step": 975 }, { "epoch": 3.24459234608985, "loss": 0.7115057706832886, "loss_ce": 0.0006903592147864401, "loss_iou": 0.24609375, "loss_num": 0.0439453125, "loss_xval": 0.7109375, "num_input_tokens_seen": 61116740, "step": 975 }, { "epoch": 3.2479201331114806, "grad_norm": 7.883967876434326, "learning_rate": 5e-06, "loss": 0.5578, "num_input_tokens_seen": 61178912, "step": 976 }, { "epoch": 3.2479201331114806, "loss": 0.7233986258506775, "loss_ce": 0.0003761877305805683, "loss_iou": 0.25390625, "loss_num": 0.043212890625, "loss_xval": 0.72265625, "num_input_tokens_seen": 61178912, "step": 976 }, { "epoch": 3.2512479201331113, "grad_norm": 18.929187774658203, "learning_rate": 5e-06, "loss": 0.6357, "num_input_tokens_seen": 61240704, "step": 977 }, { "epoch": 3.2512479201331113, "loss": 0.6345537900924683, "loss_ce": 3.23405911331065e-05, "loss_iou": 0.181640625, "loss_num": 0.054443359375, "loss_xval": 0.6328125, "num_input_tokens_seen": 61240704, "step": 977 }, { "epoch": 3.254575707154742, "grad_norm": 11.569648742675781, "learning_rate": 5e-06, "loss": 0.9763, "num_input_tokens_seen": 61303340, "step": 978 }, { "epoch": 3.254575707154742, "loss": 0.850032389163971, "loss_ce": 0.00042301719076931477, "loss_iou": 0.21484375, "loss_num": 0.083984375, "loss_xval": 0.8515625, "num_input_tokens_seen": 61303340, "step": 978 }, { "epoch": 3.2579034941763725, "grad_norm": 18.81865692138672, "learning_rate": 5e-06, "loss": 0.7637, "num_input_tokens_seen": 61366660, "step": 979 }, { "epoch": 3.2579034941763725, "loss": 0.6620635986328125, "loss_ce": 0.0005646056379191577, "loss_iou": 0.228515625, "loss_num": 0.041015625, "loss_xval": 0.66015625, "num_input_tokens_seen": 61366660, "step": 979 }, { "epoch": 3.261231281198003, "grad_norm": 21.63203239440918, "learning_rate": 5e-06, "loss": 0.6904, "num_input_tokens_seen": 61429692, "step": 980 }, { "epoch": 3.261231281198003, "loss": 0.768730103969574, "loss_ce": 0.000541669491212815, "loss_iou": 0.2578125, "loss_num": 0.051025390625, "loss_xval": 0.76953125, "num_input_tokens_seen": 61429692, "step": 980 }, { "epoch": 3.2645590682196337, "grad_norm": 26.75554656982422, "learning_rate": 5e-06, "loss": 0.9439, "num_input_tokens_seen": 61493484, "step": 981 }, { "epoch": 3.2645590682196337, "loss": 0.9156728982925415, "loss_ce": 2.3398324628942646e-05, "loss_iou": 0.33203125, "loss_num": 0.050537109375, "loss_xval": 0.9140625, "num_input_tokens_seen": 61493484, "step": 981 }, { "epoch": 3.2678868552412643, "grad_norm": 34.384639739990234, "learning_rate": 5e-06, "loss": 1.0029, "num_input_tokens_seen": 61556876, "step": 982 }, { "epoch": 3.2678868552412643, "loss": 1.1158475875854492, "loss_ce": 0.00110146077349782, "loss_iou": 0.34765625, "loss_num": 0.08349609375, "loss_xval": 1.1171875, "num_input_tokens_seen": 61556876, "step": 982 }, { "epoch": 3.271214642262895, "grad_norm": 19.943857192993164, "learning_rate": 5e-06, "loss": 0.9756, "num_input_tokens_seen": 61620480, "step": 983 }, { "epoch": 3.271214642262895, "loss": 1.0187878608703613, "loss_ce": 0.0007214570650830865, "loss_iou": 0.392578125, "loss_num": 0.04638671875, "loss_xval": 1.015625, "num_input_tokens_seen": 61620480, "step": 983 }, { "epoch": 3.2745424292845255, "grad_norm": 17.966175079345703, "learning_rate": 5e-06, "loss": 0.8658, "num_input_tokens_seen": 61684128, "step": 984 }, { "epoch": 3.2745424292845255, "loss": 0.9176982641220093, "loss_ce": 0.0013164564734324813, "loss_iou": 0.291015625, "loss_num": 0.06689453125, "loss_xval": 0.91796875, "num_input_tokens_seen": 61684128, "step": 984 }, { "epoch": 3.277870216306156, "grad_norm": 20.2591552734375, "learning_rate": 5e-06, "loss": 0.9037, "num_input_tokens_seen": 61747004, "step": 985 }, { "epoch": 3.277870216306156, "loss": 1.0413687229156494, "loss_ce": 0.0015739183872938156, "loss_iou": 0.41015625, "loss_num": 0.04345703125, "loss_xval": 1.0390625, "num_input_tokens_seen": 61747004, "step": 985 }, { "epoch": 3.2811980033277868, "grad_norm": 17.5063419342041, "learning_rate": 5e-06, "loss": 0.5624, "num_input_tokens_seen": 61807736, "step": 986 }, { "epoch": 3.2811980033277868, "loss": 0.4131319522857666, "loss_ce": 4.6039051085244864e-05, "loss_iou": 0.10400390625, "loss_num": 0.041015625, "loss_xval": 0.4140625, "num_input_tokens_seen": 61807736, "step": 986 }, { "epoch": 3.284525790349418, "grad_norm": 21.377288818359375, "learning_rate": 5e-06, "loss": 0.8481, "num_input_tokens_seen": 61870936, "step": 987 }, { "epoch": 3.284525790349418, "loss": 0.8774704337120056, "loss_ce": 2.90050265903119e-05, "loss_iou": 0.33203125, "loss_num": 0.042724609375, "loss_xval": 0.87890625, "num_input_tokens_seen": 61870936, "step": 987 }, { "epoch": 3.2878535773710484, "grad_norm": 21.52035903930664, "learning_rate": 5e-06, "loss": 0.5701, "num_input_tokens_seen": 61932556, "step": 988 }, { "epoch": 3.2878535773710484, "loss": 0.8120724558830261, "loss_ce": 0.00030482906731776893, "loss_iou": 0.27734375, "loss_num": 0.051025390625, "loss_xval": 0.8125, "num_input_tokens_seen": 61932556, "step": 988 }, { "epoch": 3.291181364392679, "grad_norm": 18.4642276763916, "learning_rate": 5e-06, "loss": 0.9416, "num_input_tokens_seen": 61996240, "step": 989 }, { "epoch": 3.291181364392679, "loss": 0.9976834058761597, "loss_ce": 0.00036900522536598146, "loss_iou": 0.375, "loss_num": 0.049560546875, "loss_xval": 0.99609375, "num_input_tokens_seen": 61996240, "step": 989 }, { "epoch": 3.2945091514143097, "grad_norm": 12.169829368591309, "learning_rate": 5e-06, "loss": 0.9705, "num_input_tokens_seen": 62058608, "step": 990 }, { "epoch": 3.2945091514143097, "loss": 1.1562477350234985, "loss_ce": 0.0007300969446077943, "loss_iou": 0.396484375, "loss_num": 0.07275390625, "loss_xval": 1.15625, "num_input_tokens_seen": 62058608, "step": 990 }, { "epoch": 3.2978369384359403, "grad_norm": 18.226551055908203, "learning_rate": 5e-06, "loss": 0.8279, "num_input_tokens_seen": 62121952, "step": 991 }, { "epoch": 3.2978369384359403, "loss": 0.7579995393753052, "loss_ce": 6.495913839899004e-05, "loss_iou": 0.26953125, "loss_num": 0.04345703125, "loss_xval": 0.7578125, "num_input_tokens_seen": 62121952, "step": 991 }, { "epoch": 3.301164725457571, "grad_norm": 5.881676197052002, "learning_rate": 5e-06, "loss": 0.4544, "num_input_tokens_seen": 62182960, "step": 992 }, { "epoch": 3.301164725457571, "loss": 0.5944963693618774, "loss_ce": 1.391188743582461e-05, "loss_iou": 0.2158203125, "loss_num": 0.03271484375, "loss_xval": 0.59375, "num_input_tokens_seen": 62182960, "step": 992 }, { "epoch": 3.3044925124792015, "grad_norm": 23.40874671936035, "learning_rate": 5e-06, "loss": 1.0811, "num_input_tokens_seen": 62245376, "step": 993 }, { "epoch": 3.3044925124792015, "loss": 1.324781894683838, "loss_ce": 0.0012955316342413425, "loss_iou": 0.44140625, "loss_num": 0.08740234375, "loss_xval": 1.3203125, "num_input_tokens_seen": 62245376, "step": 993 }, { "epoch": 3.307820299500832, "grad_norm": 16.891769409179688, "learning_rate": 5e-06, "loss": 0.8996, "num_input_tokens_seen": 62308924, "step": 994 }, { "epoch": 3.307820299500832, "loss": 1.0149037837982178, "loss_ce": 0.001720264321193099, "loss_iou": 0.380859375, "loss_num": 0.050048828125, "loss_xval": 1.015625, "num_input_tokens_seen": 62308924, "step": 994 }, { "epoch": 3.3111480865224627, "grad_norm": 8.293322563171387, "learning_rate": 5e-06, "loss": 0.8275, "num_input_tokens_seen": 62368672, "step": 995 }, { "epoch": 3.3111480865224627, "loss": 0.8809036016464233, "loss_ce": 4.425490624271333e-05, "loss_iou": 0.2392578125, "loss_num": 0.08056640625, "loss_xval": 0.8828125, "num_input_tokens_seen": 62368672, "step": 995 }, { "epoch": 3.3144758735440933, "grad_norm": 16.752803802490234, "learning_rate": 5e-06, "loss": 0.7262, "num_input_tokens_seen": 62431712, "step": 996 }, { "epoch": 3.3144758735440933, "loss": 0.7768771052360535, "loss_ce": 2.1639707483700477e-05, "loss_iou": 0.25390625, "loss_num": 0.053955078125, "loss_xval": 0.77734375, "num_input_tokens_seen": 62431712, "step": 996 }, { "epoch": 3.317803660565724, "grad_norm": 15.595096588134766, "learning_rate": 5e-06, "loss": 0.7665, "num_input_tokens_seen": 62493536, "step": 997 }, { "epoch": 3.317803660565724, "loss": 0.8226642608642578, "loss_ce": 0.00015452262596227229, "loss_iou": 0.27734375, "loss_num": 0.053466796875, "loss_xval": 0.82421875, "num_input_tokens_seen": 62493536, "step": 997 }, { "epoch": 3.3211314475873546, "grad_norm": 24.1756649017334, "learning_rate": 5e-06, "loss": 0.785, "num_input_tokens_seen": 62557440, "step": 998 }, { "epoch": 3.3211314475873546, "loss": 0.7211722731590271, "loss_ce": 0.0009574384312145412, "loss_iou": 0.27734375, "loss_num": 0.033447265625, "loss_xval": 0.71875, "num_input_tokens_seen": 62557440, "step": 998 }, { "epoch": 3.324459234608985, "grad_norm": 8.496849060058594, "learning_rate": 5e-06, "loss": 0.8508, "num_input_tokens_seen": 62620908, "step": 999 }, { "epoch": 3.324459234608985, "loss": 0.9880321025848389, "loss_ce": 0.0017039903905242682, "loss_iou": 0.337890625, "loss_num": 0.061767578125, "loss_xval": 0.984375, "num_input_tokens_seen": 62620908, "step": 999 }, { "epoch": 3.327787021630616, "grad_norm": 13.95791244506836, "learning_rate": 5e-06, "loss": 0.8183, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.327787021630616, "eval_seeclick_CIoU": 0.11919866502285004, "eval_seeclick_GIoU": 0.13886269554495811, "eval_seeclick_IoU": 0.22386732697486877, "eval_seeclick_MAE_all": 0.18997914344072342, "eval_seeclick_MAE_h": 0.04969533532857895, "eval_seeclick_MAE_w": 0.14520900696516037, "eval_seeclick_MAE_x_boxes": 0.2827809154987335, "eval_seeclick_MAE_y_boxes": 0.14949627220630646, "eval_seeclick_NUM_probability": 0.9996586740016937, "eval_seeclick_inside_bbox": 0.32083334028720856, "eval_seeclick_loss": 2.743774175643921, "eval_seeclick_loss_ce": 0.07989468798041344, "eval_seeclick_loss_iou": 0.8560791015625, "eval_seeclick_loss_num": 0.1868896484375, "eval_seeclick_loss_xval": 2.6455078125, "eval_seeclick_runtime": 71.1388, "eval_seeclick_samples_per_second": 0.661, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.327787021630616, "eval_icons_CIoU": 0.05206053704023361, "eval_icons_GIoU": 0.16786546260118484, "eval_icons_IoU": 0.20200002193450928, "eval_icons_MAE_all": 0.14681052416563034, "eval_icons_MAE_h": 0.08216147124767303, "eval_icons_MAE_w": 0.1477516144514084, "eval_icons_MAE_x_boxes": 0.1450214460492134, "eval_icons_MAE_y_boxes": 0.04793294984847307, "eval_icons_NUM_probability": 0.999993085861206, "eval_icons_inside_bbox": 0.3888888955116272, "eval_icons_loss": 2.3937511444091797, "eval_icons_loss_ce": 1.3247173171748727e-06, "eval_icons_loss_iou": 0.817138671875, "eval_icons_loss_num": 0.1392059326171875, "eval_icons_loss_xval": 2.33203125, "eval_icons_runtime": 73.4327, "eval_icons_samples_per_second": 0.681, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.327787021630616, "eval_screenspot_CIoU": 0.05590761390825113, "eval_screenspot_GIoU": 0.1182707001765569, "eval_screenspot_IoU": 0.20136764148871103, "eval_screenspot_MAE_all": 0.19364242255687714, "eval_screenspot_MAE_h": 0.06537577758232753, "eval_screenspot_MAE_w": 0.1785052220026652, "eval_screenspot_MAE_x_boxes": 0.2493977944056193, "eval_screenspot_MAE_y_boxes": 0.13203182319800058, "eval_screenspot_NUM_probability": 0.9998934666315714, "eval_screenspot_inside_bbox": 0.3787499964237213, "eval_screenspot_loss": 2.7400693893432617, "eval_screenspot_loss_ce": 0.00023837910460618636, "eval_screenspot_loss_iou": 0.8929036458333334, "eval_screenspot_loss_num": 0.197662353515625, "eval_screenspot_loss_xval": 2.7724609375, "eval_screenspot_runtime": 118.7252, "eval_screenspot_samples_per_second": 0.75, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.327787021630616, "eval_compot_CIoU": -0.032527330331504345, "eval_compot_GIoU": 0.07551112771034241, "eval_compot_IoU": 0.1327061653137207, "eval_compot_MAE_all": 0.22042576223611832, "eval_compot_MAE_h": 0.06776861473917961, "eval_compot_MAE_w": 0.24602457880973816, "eval_compot_MAE_x_boxes": 0.19308777898550034, "eval_compot_MAE_y_boxes": 0.1478528529405594, "eval_compot_NUM_probability": 0.9999693036079407, "eval_compot_inside_bbox": 0.2395833358168602, "eval_compot_loss": 2.906449556350708, "eval_compot_loss_ce": 0.005752389319241047, "eval_compot_loss_iou": 0.91650390625, "eval_compot_loss_num": 0.211029052734375, "eval_compot_loss_xval": 2.8876953125, "eval_compot_runtime": 67.807, "eval_compot_samples_per_second": 0.737, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.327787021630616, "eval_custom_ui_MAE_all": 0.08277507498860359, "eval_custom_ui_MAE_x": 0.07837143167853355, "eval_custom_ui_MAE_y": 0.08717871829867363, "eval_custom_ui_NUM_probability": 0.9999922215938568, "eval_custom_ui_loss": 0.4004429578781128, "eval_custom_ui_loss_ce": 4.6469912376778666e-05, "eval_custom_ui_loss_num": 0.0777587890625, "eval_custom_ui_loss_xval": 0.3887939453125, "eval_custom_ui_runtime": 50.8348, "eval_custom_ui_samples_per_second": 0.984, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.327787021630616, "loss": 0.3899743854999542, "loss_ce": 8.180466102203354e-05, "loss_iou": 0.0, "loss_num": 0.078125, "loss_xval": 0.390625, "num_input_tokens_seen": 62684036, "step": 1000 }, { "epoch": 3.3311148086522464, "grad_norm": 11.472808837890625, "learning_rate": 5e-06, "loss": 0.683, "num_input_tokens_seen": 62745984, "step": 1001 }, { "epoch": 3.3311148086522464, "loss": 0.5718774199485779, "loss_ce": 8.515124136465602e-06, "loss_iou": 0.12451171875, "loss_num": 0.064453125, "loss_xval": 0.5703125, "num_input_tokens_seen": 62745984, "step": 1001 }, { "epoch": 3.334442595673877, "grad_norm": 24.495885848999023, "learning_rate": 5e-06, "loss": 0.8243, "num_input_tokens_seen": 62810288, "step": 1002 }, { "epoch": 3.334442595673877, "loss": 0.7644026279449463, "loss_ce": 0.00012040699948556721, "loss_iou": 0.240234375, "loss_num": 0.056640625, "loss_xval": 0.765625, "num_input_tokens_seen": 62810288, "step": 1002 }, { "epoch": 3.3377703826955076, "grad_norm": 31.466510772705078, "learning_rate": 5e-06, "loss": 0.9726, "num_input_tokens_seen": 62873968, "step": 1003 }, { "epoch": 3.3377703826955076, "loss": 0.8061179518699646, "loss_ce": 0.00045388404396362603, "loss_iou": 0.271484375, "loss_num": 0.05224609375, "loss_xval": 0.8046875, "num_input_tokens_seen": 62873968, "step": 1003 }, { "epoch": 3.3410981697171382, "grad_norm": 17.76407241821289, "learning_rate": 5e-06, "loss": 0.83, "num_input_tokens_seen": 62935872, "step": 1004 }, { "epoch": 3.3410981697171382, "loss": 0.5958296656608582, "loss_ce": 0.001591385342180729, "loss_iou": 0.1513671875, "loss_num": 0.058349609375, "loss_xval": 0.59375, "num_input_tokens_seen": 62935872, "step": 1004 }, { "epoch": 3.344425956738769, "grad_norm": 19.567127227783203, "learning_rate": 5e-06, "loss": 0.8515, "num_input_tokens_seen": 62996516, "step": 1005 }, { "epoch": 3.344425956738769, "loss": 0.9327386617660522, "loss_ce": 0.00036568206269294024, "loss_iou": 0.31640625, "loss_num": 0.060302734375, "loss_xval": 0.93359375, "num_input_tokens_seen": 62996516, "step": 1005 }, { "epoch": 3.3477537437603995, "grad_norm": 28.632431030273438, "learning_rate": 5e-06, "loss": 0.9322, "num_input_tokens_seen": 63059556, "step": 1006 }, { "epoch": 3.3477537437603995, "loss": 0.8702507615089417, "loss_ce": 0.000133581503177993, "loss_iou": 0.2890625, "loss_num": 0.05859375, "loss_xval": 0.87109375, "num_input_tokens_seen": 63059556, "step": 1006 }, { "epoch": 3.35108153078203, "grad_norm": 13.562311172485352, "learning_rate": 5e-06, "loss": 0.6644, "num_input_tokens_seen": 63122292, "step": 1007 }, { "epoch": 3.35108153078203, "loss": 0.6380650401115417, "loss_ce": 0.00018663291120901704, "loss_iou": 0.1826171875, "loss_num": 0.054443359375, "loss_xval": 0.63671875, "num_input_tokens_seen": 63122292, "step": 1007 }, { "epoch": 3.3544093178036607, "grad_norm": 12.887714385986328, "learning_rate": 5e-06, "loss": 0.5642, "num_input_tokens_seen": 63185440, "step": 1008 }, { "epoch": 3.3544093178036607, "loss": 0.5903370380401611, "loss_ce": 0.0017139973351731896, "loss_iou": 0.2109375, "loss_num": 0.033447265625, "loss_xval": 0.58984375, "num_input_tokens_seen": 63185440, "step": 1008 }, { "epoch": 3.3577371048252913, "grad_norm": 23.336519241333008, "learning_rate": 5e-06, "loss": 0.8652, "num_input_tokens_seen": 63248932, "step": 1009 }, { "epoch": 3.3577371048252913, "loss": 0.8400235176086426, "loss_ce": 0.00042392255272716284, "loss_iou": 0.265625, "loss_num": 0.0615234375, "loss_xval": 0.83984375, "num_input_tokens_seen": 63248932, "step": 1009 }, { "epoch": 3.361064891846922, "grad_norm": 9.43217945098877, "learning_rate": 5e-06, "loss": 0.7828, "num_input_tokens_seen": 63311068, "step": 1010 }, { "epoch": 3.361064891846922, "loss": 0.7335468530654907, "loss_ce": 0.00039258040487766266, "loss_iou": 0.25390625, "loss_num": 0.04541015625, "loss_xval": 0.734375, "num_input_tokens_seen": 63311068, "step": 1010 }, { "epoch": 3.3643926788685525, "grad_norm": 14.894099235534668, "learning_rate": 5e-06, "loss": 0.8479, "num_input_tokens_seen": 63373624, "step": 1011 }, { "epoch": 3.3643926788685525, "loss": 0.9643365144729614, "loss_ce": 0.0007134462357498705, "loss_iou": 0.380859375, "loss_num": 0.0400390625, "loss_xval": 0.96484375, "num_input_tokens_seen": 63373624, "step": 1011 }, { "epoch": 3.367720465890183, "grad_norm": 21.609052658081055, "learning_rate": 5e-06, "loss": 0.6569, "num_input_tokens_seen": 63435304, "step": 1012 }, { "epoch": 3.367720465890183, "loss": 0.5942494869232178, "loss_ce": 1.1208974683540873e-05, "loss_iou": 0.1826171875, "loss_num": 0.0458984375, "loss_xval": 0.59375, "num_input_tokens_seen": 63435304, "step": 1012 }, { "epoch": 3.3710482529118138, "grad_norm": 17.91049575805664, "learning_rate": 5e-06, "loss": 0.8687, "num_input_tokens_seen": 63496096, "step": 1013 }, { "epoch": 3.3710482529118138, "loss": 0.6522290706634521, "loss_ce": 0.00025154344621114433, "loss_iou": 0.16015625, "loss_num": 0.06591796875, "loss_xval": 0.65234375, "num_input_tokens_seen": 63496096, "step": 1013 }, { "epoch": 3.3743760399334444, "grad_norm": 4.573009490966797, "learning_rate": 5e-06, "loss": 0.5975, "num_input_tokens_seen": 63557060, "step": 1014 }, { "epoch": 3.3743760399334444, "loss": 0.42771950364112854, "loss_ce": 0.0002292850404046476, "loss_iou": 0.01611328125, "loss_num": 0.0791015625, "loss_xval": 0.427734375, "num_input_tokens_seen": 63557060, "step": 1014 }, { "epoch": 3.377703826955075, "grad_norm": 13.058093070983887, "learning_rate": 5e-06, "loss": 0.6873, "num_input_tokens_seen": 63619988, "step": 1015 }, { "epoch": 3.377703826955075, "loss": 0.540930986404419, "loss_ce": 3.743675188161433e-05, "loss_iou": 0.16796875, "loss_num": 0.041015625, "loss_xval": 0.5390625, "num_input_tokens_seen": 63619988, "step": 1015 }, { "epoch": 3.3810316139767056, "grad_norm": 10.773247718811035, "learning_rate": 5e-06, "loss": 0.9057, "num_input_tokens_seen": 63683036, "step": 1016 }, { "epoch": 3.3810316139767056, "loss": 0.6826450228691101, "loss_ce": 2.7821590265375562e-05, "loss_iou": 0.25390625, "loss_num": 0.035400390625, "loss_xval": 0.68359375, "num_input_tokens_seen": 63683036, "step": 1016 }, { "epoch": 3.384359400998336, "grad_norm": 11.654807090759277, "learning_rate": 5e-06, "loss": 0.6626, "num_input_tokens_seen": 63745488, "step": 1017 }, { "epoch": 3.384359400998336, "loss": 0.5884538888931274, "loss_ce": 7.501640357077122e-05, "loss_iou": 0.1416015625, "loss_num": 0.061279296875, "loss_xval": 0.58984375, "num_input_tokens_seen": 63745488, "step": 1017 }, { "epoch": 3.387687188019967, "grad_norm": 32.220428466796875, "learning_rate": 5e-06, "loss": 0.7153, "num_input_tokens_seen": 63808984, "step": 1018 }, { "epoch": 3.387687188019967, "loss": 0.7895395755767822, "loss_ce": 0.0002329161943634972, "loss_iou": 0.279296875, "loss_num": 0.0458984375, "loss_xval": 0.7890625, "num_input_tokens_seen": 63808984, "step": 1018 }, { "epoch": 3.3910149750415974, "grad_norm": 9.35084056854248, "learning_rate": 5e-06, "loss": 0.7474, "num_input_tokens_seen": 63869352, "step": 1019 }, { "epoch": 3.3910149750415974, "loss": 0.9051174521446228, "loss_ce": 8.812104351818562e-05, "loss_iou": 0.30078125, "loss_num": 0.06005859375, "loss_xval": 0.90625, "num_input_tokens_seen": 63869352, "step": 1019 }, { "epoch": 3.394342762063228, "grad_norm": 21.432973861694336, "learning_rate": 5e-06, "loss": 0.7779, "num_input_tokens_seen": 63931168, "step": 1020 }, { "epoch": 3.394342762063228, "loss": 0.8875124454498291, "loss_ce": 0.00018336158245801926, "loss_iou": 0.328125, "loss_num": 0.046142578125, "loss_xval": 0.88671875, "num_input_tokens_seen": 63931168, "step": 1020 }, { "epoch": 3.3976705490848587, "grad_norm": 16.27943992614746, "learning_rate": 5e-06, "loss": 0.8919, "num_input_tokens_seen": 63994276, "step": 1021 }, { "epoch": 3.3976705490848587, "loss": 0.7104352116584778, "loss_ce": 0.0004742466553580016, "loss_iou": 0.2734375, "loss_num": 0.032958984375, "loss_xval": 0.7109375, "num_input_tokens_seen": 63994276, "step": 1021 }, { "epoch": 3.4009983361064893, "grad_norm": 18.430397033691406, "learning_rate": 5e-06, "loss": 0.8877, "num_input_tokens_seen": 64056064, "step": 1022 }, { "epoch": 3.4009983361064893, "loss": 0.7179223299026489, "loss_ce": 0.0005151316290721297, "loss_iou": 0.1982421875, "loss_num": 0.06396484375, "loss_xval": 0.71875, "num_input_tokens_seen": 64056064, "step": 1022 }, { "epoch": 3.40432612312812, "grad_norm": 10.066780090332031, "learning_rate": 5e-06, "loss": 1.0067, "num_input_tokens_seen": 64119668, "step": 1023 }, { "epoch": 3.40432612312812, "loss": 0.8341488838195801, "loss_ce": 4.246793105266988e-05, "loss_iou": 0.296875, "loss_num": 0.048095703125, "loss_xval": 0.8359375, "num_input_tokens_seen": 64119668, "step": 1023 }, { "epoch": 3.4076539101497505, "grad_norm": 7.158484935760498, "learning_rate": 5e-06, "loss": 0.9016, "num_input_tokens_seen": 64183132, "step": 1024 }, { "epoch": 3.4076539101497505, "loss": 0.7723921537399292, "loss_ce": 0.0006635930621996522, "loss_iou": 0.27734375, "loss_num": 0.043701171875, "loss_xval": 0.7734375, "num_input_tokens_seen": 64183132, "step": 1024 }, { "epoch": 3.410981697171381, "grad_norm": 23.170434951782227, "learning_rate": 5e-06, "loss": 0.985, "num_input_tokens_seen": 64244404, "step": 1025 }, { "epoch": 3.410981697171381, "loss": 0.9742788076400757, "loss_ce": 3.5607161407824606e-05, "loss_iou": 0.30859375, "loss_num": 0.0712890625, "loss_xval": 0.97265625, "num_input_tokens_seen": 64244404, "step": 1025 }, { "epoch": 3.4143094841930117, "grad_norm": 15.608352661132812, "learning_rate": 5e-06, "loss": 0.764, "num_input_tokens_seen": 64307508, "step": 1026 }, { "epoch": 3.4143094841930117, "loss": 0.7705492973327637, "loss_ce": 0.00016356556443497539, "loss_iou": 0.291015625, "loss_num": 0.037353515625, "loss_xval": 0.76953125, "num_input_tokens_seen": 64307508, "step": 1026 }, { "epoch": 3.4176372712146423, "grad_norm": 22.033626556396484, "learning_rate": 5e-06, "loss": 0.724, "num_input_tokens_seen": 64369472, "step": 1027 }, { "epoch": 3.4176372712146423, "loss": 0.7757673859596252, "loss_ce": 0.0007429937249980867, "loss_iou": 0.244140625, "loss_num": 0.057373046875, "loss_xval": 0.7734375, "num_input_tokens_seen": 64369472, "step": 1027 }, { "epoch": 3.420965058236273, "grad_norm": 33.458152770996094, "learning_rate": 5e-06, "loss": 0.9622, "num_input_tokens_seen": 64431116, "step": 1028 }, { "epoch": 3.420965058236273, "loss": 1.2742621898651123, "loss_ce": 0.0007026524399407208, "loss_iou": 0.41015625, "loss_num": 0.09033203125, "loss_xval": 1.2734375, "num_input_tokens_seen": 64431116, "step": 1028 }, { "epoch": 3.4242928452579036, "grad_norm": 10.938355445861816, "learning_rate": 5e-06, "loss": 0.9922, "num_input_tokens_seen": 64491820, "step": 1029 }, { "epoch": 3.4242928452579036, "loss": 1.0337855815887451, "loss_ce": 0.00033826506114564836, "loss_iou": 0.291015625, "loss_num": 0.08984375, "loss_xval": 1.03125, "num_input_tokens_seen": 64491820, "step": 1029 }, { "epoch": 3.427620632279534, "grad_norm": 14.789898872375488, "learning_rate": 5e-06, "loss": 0.8743, "num_input_tokens_seen": 64555604, "step": 1030 }, { "epoch": 3.427620632279534, "loss": 0.8684331178665161, "loss_ce": 2.4945697077782825e-05, "loss_iou": 0.337890625, "loss_num": 0.038818359375, "loss_xval": 0.8671875, "num_input_tokens_seen": 64555604, "step": 1030 }, { "epoch": 3.430948419301165, "grad_norm": 13.57603645324707, "learning_rate": 5e-06, "loss": 0.7378, "num_input_tokens_seen": 64617848, "step": 1031 }, { "epoch": 3.430948419301165, "loss": 0.9734039902687073, "loss_ce": 0.0007477752515114844, "loss_iou": 0.31640625, "loss_num": 0.06787109375, "loss_xval": 0.97265625, "num_input_tokens_seen": 64617848, "step": 1031 }, { "epoch": 3.4342762063227954, "grad_norm": 7.267496109008789, "learning_rate": 5e-06, "loss": 0.8297, "num_input_tokens_seen": 64678864, "step": 1032 }, { "epoch": 3.4342762063227954, "loss": 0.744103193283081, "loss_ce": 0.0006950664101168513, "loss_iou": 0.173828125, "loss_num": 0.0791015625, "loss_xval": 0.7421875, "num_input_tokens_seen": 64678864, "step": 1032 }, { "epoch": 3.437603993344426, "grad_norm": 9.682428359985352, "learning_rate": 5e-06, "loss": 0.9113, "num_input_tokens_seen": 64741056, "step": 1033 }, { "epoch": 3.437603993344426, "loss": 0.7183736562728882, "loss_ce": 0.0008443902479484677, "loss_iou": 0.22265625, "loss_num": 0.0546875, "loss_xval": 0.71875, "num_input_tokens_seen": 64741056, "step": 1033 }, { "epoch": 3.4409317803660566, "grad_norm": 11.420270919799805, "learning_rate": 5e-06, "loss": 0.8273, "num_input_tokens_seen": 64803852, "step": 1034 }, { "epoch": 3.4409317803660566, "loss": 0.8885650038719177, "loss_ce": 0.002090380061417818, "loss_iou": 0.283203125, "loss_num": 0.064453125, "loss_xval": 0.88671875, "num_input_tokens_seen": 64803852, "step": 1034 }, { "epoch": 3.4442595673876872, "grad_norm": 10.965718269348145, "learning_rate": 5e-06, "loss": 0.9267, "num_input_tokens_seen": 64865460, "step": 1035 }, { "epoch": 3.4442595673876872, "loss": 0.7865220308303833, "loss_ce": 2.2979209461482242e-05, "loss_iou": 0.255859375, "loss_num": 0.054931640625, "loss_xval": 0.78515625, "num_input_tokens_seen": 64865460, "step": 1035 }, { "epoch": 3.447587354409318, "grad_norm": 12.703821182250977, "learning_rate": 5e-06, "loss": 0.8564, "num_input_tokens_seen": 64929860, "step": 1036 }, { "epoch": 3.447587354409318, "loss": 0.8818983435630798, "loss_ce": 0.0012830996420234442, "loss_iou": 0.349609375, "loss_num": 0.036376953125, "loss_xval": 0.87890625, "num_input_tokens_seen": 64929860, "step": 1036 }, { "epoch": 3.4509151414309485, "grad_norm": 12.575376510620117, "learning_rate": 5e-06, "loss": 0.8592, "num_input_tokens_seen": 64991228, "step": 1037 }, { "epoch": 3.4509151414309485, "loss": 0.5446336269378662, "loss_ce": 0.00010847233352251351, "loss_iou": 0.134765625, "loss_num": 0.054931640625, "loss_xval": 0.54296875, "num_input_tokens_seen": 64991228, "step": 1037 }, { "epoch": 3.454242928452579, "grad_norm": 18.06930160522461, "learning_rate": 5e-06, "loss": 0.8297, "num_input_tokens_seen": 65052856, "step": 1038 }, { "epoch": 3.454242928452579, "loss": 0.7931275963783264, "loss_ce": 0.0008912733173929155, "loss_iou": 0.255859375, "loss_num": 0.05615234375, "loss_xval": 0.79296875, "num_input_tokens_seen": 65052856, "step": 1038 }, { "epoch": 3.4575707154742097, "grad_norm": 38.98135757446289, "learning_rate": 5e-06, "loss": 0.8898, "num_input_tokens_seen": 65115964, "step": 1039 }, { "epoch": 3.4575707154742097, "loss": 0.8223487138748169, "loss_ce": 8.309633994940668e-05, "loss_iou": 0.32421875, "loss_num": 0.034912109375, "loss_xval": 0.8203125, "num_input_tokens_seen": 65115964, "step": 1039 }, { "epoch": 3.4608985024958403, "grad_norm": 24.175413131713867, "learning_rate": 5e-06, "loss": 0.8058, "num_input_tokens_seen": 65178592, "step": 1040 }, { "epoch": 3.4608985024958403, "loss": 0.6951857209205627, "loss_ce": 0.005732577759772539, "loss_iou": 0.2353515625, "loss_num": 0.043701171875, "loss_xval": 0.6875, "num_input_tokens_seen": 65178592, "step": 1040 }, { "epoch": 3.464226289517471, "grad_norm": 12.473821640014648, "learning_rate": 5e-06, "loss": 0.4723, "num_input_tokens_seen": 65241096, "step": 1041 }, { "epoch": 3.464226289517471, "loss": 0.5358234643936157, "loss_ce": 0.00042302411748096347, "loss_iou": 0.1982421875, "loss_num": 0.02783203125, "loss_xval": 0.53515625, "num_input_tokens_seen": 65241096, "step": 1041 }, { "epoch": 3.4675540765391015, "grad_norm": 7.259097576141357, "learning_rate": 5e-06, "loss": 0.6688, "num_input_tokens_seen": 65304704, "step": 1042 }, { "epoch": 3.4675540765391015, "loss": 0.8926037549972534, "loss_ce": 0.0016125383554026484, "loss_iou": 0.31640625, "loss_num": 0.0517578125, "loss_xval": 0.890625, "num_input_tokens_seen": 65304704, "step": 1042 }, { "epoch": 3.470881863560732, "grad_norm": 9.943862915039062, "learning_rate": 5e-06, "loss": 0.7162, "num_input_tokens_seen": 65366024, "step": 1043 }, { "epoch": 3.470881863560732, "loss": 0.4994853138923645, "loss_ce": 0.00033979519503191113, "loss_iou": 0.1591796875, "loss_num": 0.036376953125, "loss_xval": 0.5, "num_input_tokens_seen": 65366024, "step": 1043 }, { "epoch": 3.4742096505823628, "grad_norm": 11.191426277160645, "learning_rate": 5e-06, "loss": 0.5539, "num_input_tokens_seen": 65426728, "step": 1044 }, { "epoch": 3.4742096505823628, "loss": 0.4565603733062744, "loss_ce": 0.00013946183025836945, "loss_iou": 0.14453125, "loss_num": 0.03369140625, "loss_xval": 0.45703125, "num_input_tokens_seen": 65426728, "step": 1044 }, { "epoch": 3.4775374376039934, "grad_norm": 13.661975860595703, "learning_rate": 5e-06, "loss": 0.7925, "num_input_tokens_seen": 65489952, "step": 1045 }, { "epoch": 3.4775374376039934, "loss": 0.9170857071876526, "loss_ce": 9.353942004963756e-05, "loss_iou": 0.345703125, "loss_num": 0.044921875, "loss_xval": 0.91796875, "num_input_tokens_seen": 65489952, "step": 1045 }, { "epoch": 3.480865224625624, "grad_norm": 12.216934204101562, "learning_rate": 5e-06, "loss": 0.8358, "num_input_tokens_seen": 65553428, "step": 1046 }, { "epoch": 3.480865224625624, "loss": 0.9110158085823059, "loss_ce": 0.00037124729715287685, "loss_iou": 0.353515625, "loss_num": 0.040771484375, "loss_xval": 0.91015625, "num_input_tokens_seen": 65553428, "step": 1046 }, { "epoch": 3.4841930116472546, "grad_norm": 12.635567665100098, "learning_rate": 5e-06, "loss": 0.7208, "num_input_tokens_seen": 65617964, "step": 1047 }, { "epoch": 3.4841930116472546, "loss": 0.6810963153839111, "loss_ce": 0.0006764110876247287, "loss_iou": 0.2490234375, "loss_num": 0.036376953125, "loss_xval": 0.6796875, "num_input_tokens_seen": 65617964, "step": 1047 }, { "epoch": 3.487520798668885, "grad_norm": 15.042474746704102, "learning_rate": 5e-06, "loss": 0.9429, "num_input_tokens_seen": 65681980, "step": 1048 }, { "epoch": 3.487520798668885, "loss": 1.086167573928833, "loss_ce": 0.001694836188107729, "loss_iou": 0.34765625, "loss_num": 0.078125, "loss_xval": 1.0859375, "num_input_tokens_seen": 65681980, "step": 1048 }, { "epoch": 3.490848585690516, "grad_norm": 19.812339782714844, "learning_rate": 5e-06, "loss": 0.6651, "num_input_tokens_seen": 65743944, "step": 1049 }, { "epoch": 3.490848585690516, "loss": 0.5148214101791382, "loss_ce": 5.08911361976061e-05, "loss_iou": 0.1455078125, "loss_num": 0.044921875, "loss_xval": 0.515625, "num_input_tokens_seen": 65743944, "step": 1049 }, { "epoch": 3.4941763727121464, "grad_norm": 15.855693817138672, "learning_rate": 5e-06, "loss": 0.5802, "num_input_tokens_seen": 65804424, "step": 1050 }, { "epoch": 3.4941763727121464, "loss": 0.6694477796554565, "loss_ce": 0.0002583569148555398, "loss_iou": 0.189453125, "loss_num": 0.05810546875, "loss_xval": 0.66796875, "num_input_tokens_seen": 65804424, "step": 1050 }, { "epoch": 3.497504159733777, "grad_norm": 12.423200607299805, "learning_rate": 5e-06, "loss": 0.8533, "num_input_tokens_seen": 65866992, "step": 1051 }, { "epoch": 3.497504159733777, "loss": 0.8513665199279785, "loss_ce": 4.8082820285344496e-05, "loss_iou": 0.296875, "loss_num": 0.052001953125, "loss_xval": 0.8515625, "num_input_tokens_seen": 65866992, "step": 1051 }, { "epoch": 3.5008319467554077, "grad_norm": 14.53134536743164, "learning_rate": 5e-06, "loss": 0.7817, "num_input_tokens_seen": 65930468, "step": 1052 }, { "epoch": 3.5008319467554077, "loss": 0.7512681484222412, "loss_ce": 0.0007798465667292476, "loss_iou": 0.2734375, "loss_num": 0.040771484375, "loss_xval": 0.75, "num_input_tokens_seen": 65930468, "step": 1052 }, { "epoch": 3.5041597337770383, "grad_norm": 19.564502716064453, "learning_rate": 5e-06, "loss": 0.8821, "num_input_tokens_seen": 65994552, "step": 1053 }, { "epoch": 3.5041597337770383, "loss": 1.1724635362625122, "loss_ce": 0.0010767867788672447, "loss_iou": 0.408203125, "loss_num": 0.0712890625, "loss_xval": 1.171875, "num_input_tokens_seen": 65994552, "step": 1053 }, { "epoch": 3.507487520798669, "grad_norm": 13.443198204040527, "learning_rate": 5e-06, "loss": 0.7527, "num_input_tokens_seen": 66057744, "step": 1054 }, { "epoch": 3.507487520798669, "loss": 0.7170261740684509, "loss_ce": 0.00047345724306069314, "loss_iou": 0.267578125, "loss_num": 0.036376953125, "loss_xval": 0.71484375, "num_input_tokens_seen": 66057744, "step": 1054 }, { "epoch": 3.5108153078202995, "grad_norm": 14.912182807922363, "learning_rate": 5e-06, "loss": 0.7259, "num_input_tokens_seen": 66121440, "step": 1055 }, { "epoch": 3.5108153078202995, "loss": 0.6320114135742188, "loss_ce": 0.00017547918832860887, "loss_iou": 0.2451171875, "loss_num": 0.0281982421875, "loss_xval": 0.6328125, "num_input_tokens_seen": 66121440, "step": 1055 }, { "epoch": 3.51414309484193, "grad_norm": 24.48227310180664, "learning_rate": 5e-06, "loss": 0.8422, "num_input_tokens_seen": 66184688, "step": 1056 }, { "epoch": 3.51414309484193, "loss": 0.7655551433563232, "loss_ce": 0.0006625619134865701, "loss_iou": 0.263671875, "loss_num": 0.04736328125, "loss_xval": 0.765625, "num_input_tokens_seen": 66184688, "step": 1056 }, { "epoch": 3.5174708818635607, "grad_norm": 12.773367881774902, "learning_rate": 5e-06, "loss": 0.7156, "num_input_tokens_seen": 66247292, "step": 1057 }, { "epoch": 3.5174708818635607, "loss": 0.5347628593444824, "loss_ce": 0.00046112615382298827, "loss_iou": 0.1669921875, "loss_num": 0.039794921875, "loss_xval": 0.53515625, "num_input_tokens_seen": 66247292, "step": 1057 }, { "epoch": 3.5207986688851913, "grad_norm": 11.14990234375, "learning_rate": 5e-06, "loss": 0.5024, "num_input_tokens_seen": 66310532, "step": 1058 }, { "epoch": 3.5207986688851913, "loss": 0.6813678741455078, "loss_ce": 0.0007648678729310632, "loss_iou": 0.236328125, "loss_num": 0.04150390625, "loss_xval": 0.6796875, "num_input_tokens_seen": 66310532, "step": 1058 }, { "epoch": 3.524126455906822, "grad_norm": 16.360013961791992, "learning_rate": 5e-06, "loss": 0.9234, "num_input_tokens_seen": 66373648, "step": 1059 }, { "epoch": 3.524126455906822, "loss": 0.9731760025024414, "loss_ce": 3.146879680571146e-05, "loss_iou": 0.345703125, "loss_num": 0.056396484375, "loss_xval": 0.97265625, "num_input_tokens_seen": 66373648, "step": 1059 }, { "epoch": 3.5274542429284526, "grad_norm": 9.46167278289795, "learning_rate": 5e-06, "loss": 0.4595, "num_input_tokens_seen": 66436316, "step": 1060 }, { "epoch": 3.5274542429284526, "loss": 0.5476481914520264, "loss_ce": 4.0785591409076005e-05, "loss_iou": 0.197265625, "loss_num": 0.03076171875, "loss_xval": 0.546875, "num_input_tokens_seen": 66436316, "step": 1060 }, { "epoch": 3.530782029950083, "grad_norm": 14.10741901397705, "learning_rate": 5e-06, "loss": 0.9308, "num_input_tokens_seen": 66498344, "step": 1061 }, { "epoch": 3.530782029950083, "loss": 0.9000440835952759, "loss_ce": 0.002461038064211607, "loss_iou": 0.310546875, "loss_num": 0.054931640625, "loss_xval": 0.8984375, "num_input_tokens_seen": 66498344, "step": 1061 }, { "epoch": 3.534109816971714, "grad_norm": 23.609783172607422, "learning_rate": 5e-06, "loss": 0.9126, "num_input_tokens_seen": 66562436, "step": 1062 }, { "epoch": 3.534109816971714, "loss": 0.7102041840553284, "loss_ce": 0.00036530819488689303, "loss_iou": 0.248046875, "loss_num": 0.04248046875, "loss_xval": 0.7109375, "num_input_tokens_seen": 66562436, "step": 1062 }, { "epoch": 3.5374376039933444, "grad_norm": 20.392234802246094, "learning_rate": 5e-06, "loss": 0.5586, "num_input_tokens_seen": 66624320, "step": 1063 }, { "epoch": 3.5374376039933444, "loss": 0.573874831199646, "loss_ce": 0.0008768028346821666, "loss_iou": 0.1318359375, "loss_num": 0.061767578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 66624320, "step": 1063 }, { "epoch": 3.540765391014975, "grad_norm": 14.3955078125, "learning_rate": 5e-06, "loss": 0.772, "num_input_tokens_seen": 66687376, "step": 1064 }, { "epoch": 3.540765391014975, "loss": 0.6318628787994385, "loss_ce": 2.6940453608403914e-05, "loss_iou": 0.1611328125, "loss_num": 0.061767578125, "loss_xval": 0.6328125, "num_input_tokens_seen": 66687376, "step": 1064 }, { "epoch": 3.5440931780366056, "grad_norm": 16.033170700073242, "learning_rate": 5e-06, "loss": 0.6708, "num_input_tokens_seen": 66751300, "step": 1065 }, { "epoch": 3.5440931780366056, "loss": 0.6771686673164368, "loss_ce": 0.0011432725004851818, "loss_iou": 0.25390625, "loss_num": 0.033935546875, "loss_xval": 0.67578125, "num_input_tokens_seen": 66751300, "step": 1065 }, { "epoch": 3.5474209650582362, "grad_norm": 19.82112693786621, "learning_rate": 5e-06, "loss": 1.0317, "num_input_tokens_seen": 66814000, "step": 1066 }, { "epoch": 3.5474209650582362, "loss": 1.0494093894958496, "loss_ce": 9.300906822318211e-05, "loss_iou": 0.333984375, "loss_num": 0.07568359375, "loss_xval": 1.046875, "num_input_tokens_seen": 66814000, "step": 1066 }, { "epoch": 3.550748752079867, "grad_norm": 25.609535217285156, "learning_rate": 5e-06, "loss": 0.7597, "num_input_tokens_seen": 66877452, "step": 1067 }, { "epoch": 3.550748752079867, "loss": 0.6766536831855774, "loss_ce": 0.0016658806707710028, "loss_iou": 0.240234375, "loss_num": 0.0390625, "loss_xval": 0.67578125, "num_input_tokens_seen": 66877452, "step": 1067 }, { "epoch": 3.5540765391014975, "grad_norm": 17.325336456298828, "learning_rate": 5e-06, "loss": 0.6623, "num_input_tokens_seen": 66937972, "step": 1068 }, { "epoch": 3.5540765391014975, "loss": 0.830532431602478, "loss_ce": 0.0003322733100503683, "loss_iou": 0.21875, "loss_num": 0.07861328125, "loss_xval": 0.83203125, "num_input_tokens_seen": 66937972, "step": 1068 }, { "epoch": 3.557404326123128, "grad_norm": 22.54002571105957, "learning_rate": 5e-06, "loss": 0.8791, "num_input_tokens_seen": 67000040, "step": 1069 }, { "epoch": 3.557404326123128, "loss": 1.1053575277328491, "loss_ce": 0.00037706823786720634, "loss_iou": 0.34375, "loss_num": 0.08349609375, "loss_xval": 1.1015625, "num_input_tokens_seen": 67000040, "step": 1069 }, { "epoch": 3.5607321131447587, "grad_norm": 9.65036678314209, "learning_rate": 5e-06, "loss": 0.699, "num_input_tokens_seen": 67062404, "step": 1070 }, { "epoch": 3.5607321131447587, "loss": 0.52567458152771, "loss_ce": 3.979210669058375e-05, "loss_iou": 0.158203125, "loss_num": 0.041748046875, "loss_xval": 0.52734375, "num_input_tokens_seen": 67062404, "step": 1070 }, { "epoch": 3.5640599001663893, "grad_norm": 9.910517692565918, "learning_rate": 5e-06, "loss": 0.7048, "num_input_tokens_seen": 67124276, "step": 1071 }, { "epoch": 3.5640599001663893, "loss": 0.6014664173126221, "loss_ce": 2.5981264116126113e-05, "loss_iou": 0.1875, "loss_num": 0.045166015625, "loss_xval": 0.6015625, "num_input_tokens_seen": 67124276, "step": 1071 }, { "epoch": 3.56738768718802, "grad_norm": 15.538228034973145, "learning_rate": 5e-06, "loss": 0.9398, "num_input_tokens_seen": 67187400, "step": 1072 }, { "epoch": 3.56738768718802, "loss": 1.0962104797363281, "loss_ce": 1.9056995370192453e-05, "loss_iou": 0.376953125, "loss_num": 0.068359375, "loss_xval": 1.09375, "num_input_tokens_seen": 67187400, "step": 1072 }, { "epoch": 3.5707154742096505, "grad_norm": 9.4950532913208, "learning_rate": 5e-06, "loss": 0.879, "num_input_tokens_seen": 67249424, "step": 1073 }, { "epoch": 3.5707154742096505, "loss": 0.9734940528869629, "loss_ce": 0.00010540573566686362, "loss_iou": 0.306640625, "loss_num": 0.07177734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 67249424, "step": 1073 }, { "epoch": 3.574043261231281, "grad_norm": 18.527990341186523, "learning_rate": 5e-06, "loss": 0.8317, "num_input_tokens_seen": 67313036, "step": 1074 }, { "epoch": 3.574043261231281, "loss": 0.6008124947547913, "loss_ce": 0.0003486633358988911, "loss_iou": 0.185546875, "loss_num": 0.045654296875, "loss_xval": 0.6015625, "num_input_tokens_seen": 67313036, "step": 1074 }, { "epoch": 3.5773710482529117, "grad_norm": 26.33303451538086, "learning_rate": 5e-06, "loss": 0.8054, "num_input_tokens_seen": 67375220, "step": 1075 }, { "epoch": 3.5773710482529117, "loss": 0.8885135054588318, "loss_ce": 0.0008181866724044085, "loss_iou": 0.306640625, "loss_num": 0.054931640625, "loss_xval": 0.88671875, "num_input_tokens_seen": 67375220, "step": 1075 }, { "epoch": 3.5806988352745424, "grad_norm": 13.492464065551758, "learning_rate": 5e-06, "loss": 0.7742, "num_input_tokens_seen": 67436772, "step": 1076 }, { "epoch": 3.5806988352745424, "loss": 0.775583803653717, "loss_ce": 0.00019318400882184505, "loss_iou": 0.2294921875, "loss_num": 0.06298828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 67436772, "step": 1076 }, { "epoch": 3.584026622296173, "grad_norm": 13.470839500427246, "learning_rate": 5e-06, "loss": 0.7942, "num_input_tokens_seen": 67498376, "step": 1077 }, { "epoch": 3.584026622296173, "loss": 0.5680028200149536, "loss_ce": 9.682446034275927e-06, "loss_iou": 0.1904296875, "loss_num": 0.03759765625, "loss_xval": 0.56640625, "num_input_tokens_seen": 67498376, "step": 1077 }, { "epoch": 3.5873544093178036, "grad_norm": 11.887737274169922, "learning_rate": 5e-06, "loss": 0.7184, "num_input_tokens_seen": 67559900, "step": 1078 }, { "epoch": 3.5873544093178036, "loss": 0.8478894233703613, "loss_ce": 0.0009656131733208895, "loss_iou": 0.248046875, "loss_num": 0.0703125, "loss_xval": 0.84765625, "num_input_tokens_seen": 67559900, "step": 1078 }, { "epoch": 3.590682196339434, "grad_norm": 32.69264602661133, "learning_rate": 5e-06, "loss": 0.7889, "num_input_tokens_seen": 67622724, "step": 1079 }, { "epoch": 3.590682196339434, "loss": 0.760546863079071, "loss_ce": 4.88169098389335e-05, "loss_iou": 0.259765625, "loss_num": 0.04833984375, "loss_xval": 0.76171875, "num_input_tokens_seen": 67622724, "step": 1079 }, { "epoch": 3.594009983361065, "grad_norm": 15.543495178222656, "learning_rate": 5e-06, "loss": 0.6654, "num_input_tokens_seen": 67683704, "step": 1080 }, { "epoch": 3.594009983361065, "loss": 0.6395562291145325, "loss_ce": 2.9867431294405833e-05, "loss_iou": 0.1640625, "loss_num": 0.06201171875, "loss_xval": 0.640625, "num_input_tokens_seen": 67683704, "step": 1080 }, { "epoch": 3.5973377703826954, "grad_norm": 22.80522918701172, "learning_rate": 5e-06, "loss": 0.8463, "num_input_tokens_seen": 67746952, "step": 1081 }, { "epoch": 3.5973377703826954, "loss": 0.8648264408111572, "loss_ce": 0.000446604797616601, "loss_iou": 0.30078125, "loss_num": 0.052490234375, "loss_xval": 0.86328125, "num_input_tokens_seen": 67746952, "step": 1081 }, { "epoch": 3.600665557404326, "grad_norm": 18.392467498779297, "learning_rate": 5e-06, "loss": 0.6663, "num_input_tokens_seen": 67809764, "step": 1082 }, { "epoch": 3.600665557404326, "loss": 0.741300106048584, "loss_ce": 0.0014319655019789934, "loss_iou": 0.271484375, "loss_num": 0.039306640625, "loss_xval": 0.73828125, "num_input_tokens_seen": 67809764, "step": 1082 }, { "epoch": 3.6039933444259566, "grad_norm": 10.33826732635498, "learning_rate": 5e-06, "loss": 0.6008, "num_input_tokens_seen": 67871348, "step": 1083 }, { "epoch": 3.6039933444259566, "loss": 0.4612269103527069, "loss_ce": 4.5263179345056415e-05, "loss_iou": 0.11865234375, "loss_num": 0.044677734375, "loss_xval": 0.4609375, "num_input_tokens_seen": 67871348, "step": 1083 }, { "epoch": 3.6073211314475873, "grad_norm": 26.75817108154297, "learning_rate": 5e-06, "loss": 0.7098, "num_input_tokens_seen": 67934820, "step": 1084 }, { "epoch": 3.6073211314475873, "loss": 0.7330468893051147, "loss_ce": 1.4662471585324965e-05, "loss_iou": 0.279296875, "loss_num": 0.03515625, "loss_xval": 0.734375, "num_input_tokens_seen": 67934820, "step": 1084 }, { "epoch": 3.610648918469218, "grad_norm": 14.098214149475098, "learning_rate": 5e-06, "loss": 0.7828, "num_input_tokens_seen": 67996592, "step": 1085 }, { "epoch": 3.610648918469218, "loss": 0.6801908016204834, "loss_ce": 1.5059520592330955e-05, "loss_iou": 0.228515625, "loss_num": 0.044677734375, "loss_xval": 0.6796875, "num_input_tokens_seen": 67996592, "step": 1085 }, { "epoch": 3.6139767054908485, "grad_norm": 11.56197452545166, "learning_rate": 5e-06, "loss": 0.7931, "num_input_tokens_seen": 68060428, "step": 1086 }, { "epoch": 3.6139767054908485, "loss": 0.7243208885192871, "loss_ce": 0.0006881133886054158, "loss_iou": 0.2236328125, "loss_num": 0.05517578125, "loss_xval": 0.72265625, "num_input_tokens_seen": 68060428, "step": 1086 }, { "epoch": 3.617304492512479, "grad_norm": 9.20474910736084, "learning_rate": 5e-06, "loss": 0.7987, "num_input_tokens_seen": 68120428, "step": 1087 }, { "epoch": 3.617304492512479, "loss": 0.9417086243629456, "loss_ce": 0.00079060293501243, "loss_iou": 0.28515625, "loss_num": 0.07421875, "loss_xval": 0.94140625, "num_input_tokens_seen": 68120428, "step": 1087 }, { "epoch": 3.6206322795341097, "grad_norm": 23.553590774536133, "learning_rate": 5e-06, "loss": 0.7281, "num_input_tokens_seen": 68183092, "step": 1088 }, { "epoch": 3.6206322795341097, "loss": 0.6970406770706177, "loss_ce": 1.9182030882802792e-05, "loss_iou": 0.177734375, "loss_num": 0.068359375, "loss_xval": 0.6953125, "num_input_tokens_seen": 68183092, "step": 1088 }, { "epoch": 3.6239600665557403, "grad_norm": 16.959150314331055, "learning_rate": 5e-06, "loss": 0.6518, "num_input_tokens_seen": 68245676, "step": 1089 }, { "epoch": 3.6239600665557403, "loss": 0.6579831838607788, "loss_ce": 2.4194079742301255e-05, "loss_iou": 0.2197265625, "loss_num": 0.04345703125, "loss_xval": 0.65625, "num_input_tokens_seen": 68245676, "step": 1089 }, { "epoch": 3.627287853577371, "grad_norm": 47.85329055786133, "learning_rate": 5e-06, "loss": 0.7832, "num_input_tokens_seen": 68308340, "step": 1090 }, { "epoch": 3.627287853577371, "loss": 0.8862950801849365, "loss_ce": 0.00030875191441737115, "loss_iou": 0.328125, "loss_num": 0.0458984375, "loss_xval": 0.88671875, "num_input_tokens_seen": 68308340, "step": 1090 }, { "epoch": 3.6306156405990015, "grad_norm": 13.920936584472656, "learning_rate": 5e-06, "loss": 0.7249, "num_input_tokens_seen": 68370692, "step": 1091 }, { "epoch": 3.6306156405990015, "loss": 0.8974728584289551, "loss_ce": 0.0002561031433288008, "loss_iou": 0.33203125, "loss_num": 0.047119140625, "loss_xval": 0.8984375, "num_input_tokens_seen": 68370692, "step": 1091 }, { "epoch": 3.633943427620632, "grad_norm": 9.428908348083496, "learning_rate": 5e-06, "loss": 0.6884, "num_input_tokens_seen": 68432480, "step": 1092 }, { "epoch": 3.633943427620632, "loss": 0.6953365802764893, "loss_ce": 2.4052456865319982e-05, "loss_iou": 0.232421875, "loss_num": 0.04638671875, "loss_xval": 0.6953125, "num_input_tokens_seen": 68432480, "step": 1092 }, { "epoch": 3.6372712146422628, "grad_norm": 15.467686653137207, "learning_rate": 5e-06, "loss": 0.6283, "num_input_tokens_seen": 68494344, "step": 1093 }, { "epoch": 3.6372712146422628, "loss": 0.839697539806366, "loss_ce": 0.0005862598773092031, "loss_iou": 0.333984375, "loss_num": 0.033935546875, "loss_xval": 0.83984375, "num_input_tokens_seen": 68494344, "step": 1093 }, { "epoch": 3.6405990016638934, "grad_norm": 13.4644775390625, "learning_rate": 5e-06, "loss": 0.9332, "num_input_tokens_seen": 68556756, "step": 1094 }, { "epoch": 3.6405990016638934, "loss": 1.0598011016845703, "loss_ce": 0.00023084873100742698, "loss_iou": 0.365234375, "loss_num": 0.06591796875, "loss_xval": 1.0625, "num_input_tokens_seen": 68556756, "step": 1094 }, { "epoch": 3.643926788685524, "grad_norm": 7.175938129425049, "learning_rate": 5e-06, "loss": 0.7075, "num_input_tokens_seen": 68616224, "step": 1095 }, { "epoch": 3.643926788685524, "loss": 0.7629657983779907, "loss_ce": 2.6325005819671787e-05, "loss_iou": 0.2373046875, "loss_num": 0.057373046875, "loss_xval": 0.76171875, "num_input_tokens_seen": 68616224, "step": 1095 }, { "epoch": 3.6472545757071546, "grad_norm": 18.110021591186523, "learning_rate": 5e-06, "loss": 0.7513, "num_input_tokens_seen": 68679384, "step": 1096 }, { "epoch": 3.6472545757071546, "loss": 0.542603611946106, "loss_ce": 0.00012313880142755806, "loss_iou": 0.1650390625, "loss_num": 0.04248046875, "loss_xval": 0.54296875, "num_input_tokens_seen": 68679384, "step": 1096 }, { "epoch": 3.6505823627287852, "grad_norm": 24.87841033935547, "learning_rate": 5e-06, "loss": 0.897, "num_input_tokens_seen": 68740984, "step": 1097 }, { "epoch": 3.6505823627287852, "loss": 0.7363914251327515, "loss_ce": 0.0005515533266589046, "loss_iou": 0.28125, "loss_num": 0.03466796875, "loss_xval": 0.734375, "num_input_tokens_seen": 68740984, "step": 1097 }, { "epoch": 3.653910149750416, "grad_norm": 14.83497428894043, "learning_rate": 5e-06, "loss": 0.4878, "num_input_tokens_seen": 68803040, "step": 1098 }, { "epoch": 3.653910149750416, "loss": 0.4231252670288086, "loss_ce": 2.955349555122666e-05, "loss_iou": 0.1171875, "loss_num": 0.037841796875, "loss_xval": 0.423828125, "num_input_tokens_seen": 68803040, "step": 1098 }, { "epoch": 3.6572379367720464, "grad_norm": 10.35799503326416, "learning_rate": 5e-06, "loss": 0.7078, "num_input_tokens_seen": 68866936, "step": 1099 }, { "epoch": 3.6572379367720464, "loss": 0.56097412109375, "loss_ce": 0.0014343142975121737, "loss_iou": 0.173828125, "loss_num": 0.04248046875, "loss_xval": 0.55859375, "num_input_tokens_seen": 68866936, "step": 1099 }, { "epoch": 3.660565723793677, "grad_norm": 20.12316131591797, "learning_rate": 5e-06, "loss": 0.6702, "num_input_tokens_seen": 68927524, "step": 1100 }, { "epoch": 3.660565723793677, "loss": 0.9203138947486877, "loss_ce": 0.0006362266722135246, "loss_iou": 0.306640625, "loss_num": 0.06103515625, "loss_xval": 0.91796875, "num_input_tokens_seen": 68927524, "step": 1100 }, { "epoch": 3.6638935108153077, "grad_norm": 22.879741668701172, "learning_rate": 5e-06, "loss": 0.8751, "num_input_tokens_seen": 68989620, "step": 1101 }, { "epoch": 3.6638935108153077, "loss": 0.8814558982849121, "loss_ce": 0.017686322331428528, "loss_iou": 0.333984375, "loss_num": 0.039306640625, "loss_xval": 0.86328125, "num_input_tokens_seen": 68989620, "step": 1101 }, { "epoch": 3.6672212978369383, "grad_norm": 13.441774368286133, "learning_rate": 5e-06, "loss": 0.7847, "num_input_tokens_seen": 69051672, "step": 1102 }, { "epoch": 3.6672212978369383, "loss": 0.9437904357910156, "loss_ce": 0.0003089719684794545, "loss_iou": 0.33984375, "loss_num": 0.052978515625, "loss_xval": 0.9453125, "num_input_tokens_seen": 69051672, "step": 1102 }, { "epoch": 3.670549084858569, "grad_norm": 15.477404594421387, "learning_rate": 5e-06, "loss": 0.7314, "num_input_tokens_seen": 69115388, "step": 1103 }, { "epoch": 3.670549084858569, "loss": 0.6018927097320557, "loss_ce": 0.0008184734033420682, "loss_iou": 0.2138671875, "loss_num": 0.03466796875, "loss_xval": 0.6015625, "num_input_tokens_seen": 69115388, "step": 1103 }, { "epoch": 3.6738768718801995, "grad_norm": 19.463781356811523, "learning_rate": 5e-06, "loss": 0.7772, "num_input_tokens_seen": 69178392, "step": 1104 }, { "epoch": 3.6738768718801995, "loss": 0.7169560194015503, "loss_ce": 0.00028123901574872434, "loss_iou": 0.1845703125, "loss_num": 0.0693359375, "loss_xval": 0.71484375, "num_input_tokens_seen": 69178392, "step": 1104 }, { "epoch": 3.67720465890183, "grad_norm": 7.575782775878906, "learning_rate": 5e-06, "loss": 0.5294, "num_input_tokens_seen": 69239676, "step": 1105 }, { "epoch": 3.67720465890183, "loss": 0.531362771987915, "loss_ce": 0.0006010084762237966, "loss_iou": 0.142578125, "loss_num": 0.049072265625, "loss_xval": 0.53125, "num_input_tokens_seen": 69239676, "step": 1105 }, { "epoch": 3.6805324459234607, "grad_norm": 14.393789291381836, "learning_rate": 5e-06, "loss": 0.7182, "num_input_tokens_seen": 69301396, "step": 1106 }, { "epoch": 3.6805324459234607, "loss": 0.6370840072631836, "loss_ce": 0.0003652620653156191, "loss_iou": 0.177734375, "loss_num": 0.056396484375, "loss_xval": 0.63671875, "num_input_tokens_seen": 69301396, "step": 1106 }, { "epoch": 3.6838602329450914, "grad_norm": 11.867156028747559, "learning_rate": 5e-06, "loss": 0.9087, "num_input_tokens_seen": 69362984, "step": 1107 }, { "epoch": 3.6838602329450914, "loss": 0.5029775500297546, "loss_ce": 0.00029203054145909846, "loss_iou": 0.1015625, "loss_num": 0.059814453125, "loss_xval": 0.50390625, "num_input_tokens_seen": 69362984, "step": 1107 }, { "epoch": 3.687188019966722, "grad_norm": 14.382938385009766, "learning_rate": 5e-06, "loss": 0.7059, "num_input_tokens_seen": 69425984, "step": 1108 }, { "epoch": 3.687188019966722, "loss": 0.7315462827682495, "loss_ce": 0.00034508475800976157, "loss_iou": 0.201171875, "loss_num": 0.06591796875, "loss_xval": 0.73046875, "num_input_tokens_seen": 69425984, "step": 1108 }, { "epoch": 3.690515806988353, "grad_norm": 21.43218994140625, "learning_rate": 5e-06, "loss": 0.7946, "num_input_tokens_seen": 69489740, "step": 1109 }, { "epoch": 3.690515806988353, "loss": 0.7349926829338074, "loss_ce": 0.0006176835740916431, "loss_iou": 0.2060546875, "loss_num": 0.064453125, "loss_xval": 0.734375, "num_input_tokens_seen": 69489740, "step": 1109 }, { "epoch": 3.6938435940099836, "grad_norm": 16.769193649291992, "learning_rate": 5e-06, "loss": 0.6175, "num_input_tokens_seen": 69550704, "step": 1110 }, { "epoch": 3.6938435940099836, "loss": 0.634501576423645, "loss_ce": 0.0007125326665118337, "loss_iou": 0.17578125, "loss_num": 0.056640625, "loss_xval": 0.6328125, "num_input_tokens_seen": 69550704, "step": 1110 }, { "epoch": 3.6971713810316142, "grad_norm": 32.770267486572266, "learning_rate": 5e-06, "loss": 0.9464, "num_input_tokens_seen": 69614880, "step": 1111 }, { "epoch": 3.6971713810316142, "loss": 1.0868799686431885, "loss_ce": 0.0009425695170648396, "loss_iou": 0.390625, "loss_num": 0.061279296875, "loss_xval": 1.0859375, "num_input_tokens_seen": 69614880, "step": 1111 }, { "epoch": 3.700499168053245, "grad_norm": 43.10527420043945, "learning_rate": 5e-06, "loss": 0.9164, "num_input_tokens_seen": 69677600, "step": 1112 }, { "epoch": 3.700499168053245, "loss": 0.7824146151542664, "loss_ce": 4.9382551878807135e-06, "loss_iou": 0.259765625, "loss_num": 0.05224609375, "loss_xval": 0.78125, "num_input_tokens_seen": 69677600, "step": 1112 }, { "epoch": 3.7038269550748755, "grad_norm": 22.692411422729492, "learning_rate": 5e-06, "loss": 0.974, "num_input_tokens_seen": 69739600, "step": 1113 }, { "epoch": 3.7038269550748755, "loss": 0.9298710823059082, "loss_ce": 0.0009159321198239923, "loss_iou": 0.283203125, "loss_num": 0.072265625, "loss_xval": 0.9296875, "num_input_tokens_seen": 69739600, "step": 1113 }, { "epoch": 3.707154742096506, "grad_norm": 10.36915111541748, "learning_rate": 5e-06, "loss": 0.6903, "num_input_tokens_seen": 69800996, "step": 1114 }, { "epoch": 3.707154742096506, "loss": 0.6277588605880737, "loss_ce": 0.000439511495642364, "loss_iou": 0.2119140625, "loss_num": 0.041015625, "loss_xval": 0.62890625, "num_input_tokens_seen": 69800996, "step": 1114 }, { "epoch": 3.7104825291181367, "grad_norm": 15.28381061553955, "learning_rate": 5e-06, "loss": 0.8741, "num_input_tokens_seen": 69863736, "step": 1115 }, { "epoch": 3.7104825291181367, "loss": 0.9558818340301514, "loss_ce": 0.0011699418537318707, "loss_iou": 0.3125, "loss_num": 0.0654296875, "loss_xval": 0.953125, "num_input_tokens_seen": 69863736, "step": 1115 }, { "epoch": 3.7138103161397673, "grad_norm": 11.837628364562988, "learning_rate": 5e-06, "loss": 0.8104, "num_input_tokens_seen": 69927776, "step": 1116 }, { "epoch": 3.7138103161397673, "loss": 0.8550588488578796, "loss_ce": 0.0003225263499189168, "loss_iou": 0.302734375, "loss_num": 0.049560546875, "loss_xval": 0.85546875, "num_input_tokens_seen": 69927776, "step": 1116 }, { "epoch": 3.717138103161398, "grad_norm": 20.32373046875, "learning_rate": 5e-06, "loss": 0.8018, "num_input_tokens_seen": 69991752, "step": 1117 }, { "epoch": 3.717138103161398, "loss": 0.7404411435127258, "loss_ce": 0.0006950714159756899, "loss_iou": 0.2734375, "loss_num": 0.038330078125, "loss_xval": 0.73828125, "num_input_tokens_seen": 69991752, "step": 1117 }, { "epoch": 3.7204658901830285, "grad_norm": 14.117117881774902, "learning_rate": 5e-06, "loss": 0.7821, "num_input_tokens_seen": 70055220, "step": 1118 }, { "epoch": 3.7204658901830285, "loss": 1.1473777294158936, "loss_ce": 0.0010153691982850432, "loss_iou": 0.42578125, "loss_num": 0.058837890625, "loss_xval": 1.1484375, "num_input_tokens_seen": 70055220, "step": 1118 }, { "epoch": 3.723793677204659, "grad_norm": 11.328316688537598, "learning_rate": 5e-06, "loss": 0.6542, "num_input_tokens_seen": 70117140, "step": 1119 }, { "epoch": 3.723793677204659, "loss": 0.5794090032577515, "loss_ce": 0.00030745528056286275, "loss_iou": 0.1748046875, "loss_num": 0.0458984375, "loss_xval": 0.578125, "num_input_tokens_seen": 70117140, "step": 1119 }, { "epoch": 3.7271214642262898, "grad_norm": 9.209413528442383, "learning_rate": 5e-06, "loss": 0.6797, "num_input_tokens_seen": 70179304, "step": 1120 }, { "epoch": 3.7271214642262898, "loss": 0.6281410455703735, "loss_ce": 0.0010658506071195006, "loss_iou": 0.1904296875, "loss_num": 0.049072265625, "loss_xval": 0.62890625, "num_input_tokens_seen": 70179304, "step": 1120 }, { "epoch": 3.7304492512479204, "grad_norm": 12.064332008361816, "learning_rate": 5e-06, "loss": 0.682, "num_input_tokens_seen": 70242640, "step": 1121 }, { "epoch": 3.7304492512479204, "loss": 0.6883260011672974, "loss_ce": 0.00033775315387174487, "loss_iou": 0.2431640625, "loss_num": 0.04052734375, "loss_xval": 0.6875, "num_input_tokens_seen": 70242640, "step": 1121 }, { "epoch": 3.733777038269551, "grad_norm": 21.927749633789062, "learning_rate": 5e-06, "loss": 0.9156, "num_input_tokens_seen": 70306468, "step": 1122 }, { "epoch": 3.733777038269551, "loss": 0.9951090216636658, "loss_ce": 0.00048012335901148617, "loss_iou": 0.3515625, "loss_num": 0.058349609375, "loss_xval": 0.99609375, "num_input_tokens_seen": 70306468, "step": 1122 }, { "epoch": 3.7371048252911816, "grad_norm": 43.249393463134766, "learning_rate": 5e-06, "loss": 1.0435, "num_input_tokens_seen": 70369504, "step": 1123 }, { "epoch": 3.7371048252911816, "loss": 1.0370984077453613, "loss_ce": 0.0014538050163537264, "loss_iou": 0.306640625, "loss_num": 0.08447265625, "loss_xval": 1.0390625, "num_input_tokens_seen": 70369504, "step": 1123 }, { "epoch": 3.740432612312812, "grad_norm": 17.981069564819336, "learning_rate": 5e-06, "loss": 0.6572, "num_input_tokens_seen": 70432836, "step": 1124 }, { "epoch": 3.740432612312812, "loss": 0.7932811975479126, "loss_ce": 6.828659388702363e-05, "loss_iou": 0.2734375, "loss_num": 0.049072265625, "loss_xval": 0.79296875, "num_input_tokens_seen": 70432836, "step": 1124 }, { "epoch": 3.743760399334443, "grad_norm": 13.31531047821045, "learning_rate": 5e-06, "loss": 0.7378, "num_input_tokens_seen": 70495112, "step": 1125 }, { "epoch": 3.743760399334443, "loss": 0.9534047245979309, "loss_ce": 0.001012163469567895, "loss_iou": 0.357421875, "loss_num": 0.04736328125, "loss_xval": 0.953125, "num_input_tokens_seen": 70495112, "step": 1125 }, { "epoch": 3.7470881863560734, "grad_norm": 13.23071002960205, "learning_rate": 5e-06, "loss": 1.0306, "num_input_tokens_seen": 70557332, "step": 1126 }, { "epoch": 3.7470881863560734, "loss": 1.243412971496582, "loss_ce": 0.0004931364674121141, "loss_iou": 0.4375, "loss_num": 0.07373046875, "loss_xval": 1.2421875, "num_input_tokens_seen": 70557332, "step": 1126 }, { "epoch": 3.750415973377704, "grad_norm": 11.754276275634766, "learning_rate": 5e-06, "loss": 0.7345, "num_input_tokens_seen": 70619872, "step": 1127 }, { "epoch": 3.750415973377704, "loss": 0.9770193099975586, "loss_ce": 0.0011892361799255013, "loss_iou": 0.310546875, "loss_num": 0.0703125, "loss_xval": 0.9765625, "num_input_tokens_seen": 70619872, "step": 1127 }, { "epoch": 3.7537437603993347, "grad_norm": 16.051631927490234, "learning_rate": 5e-06, "loss": 1.002, "num_input_tokens_seen": 70681060, "step": 1128 }, { "epoch": 3.7537437603993347, "loss": 1.000293493270874, "loss_ce": 0.002002517692744732, "loss_iou": 0.3046875, "loss_num": 0.07763671875, "loss_xval": 1.0, "num_input_tokens_seen": 70681060, "step": 1128 }, { "epoch": 3.7570715474209653, "grad_norm": 12.550768852233887, "learning_rate": 5e-06, "loss": 0.7239, "num_input_tokens_seen": 70742176, "step": 1129 }, { "epoch": 3.7570715474209653, "loss": 0.8635753393173218, "loss_ce": 0.000538200605660677, "loss_iou": 0.29296875, "loss_num": 0.05517578125, "loss_xval": 0.86328125, "num_input_tokens_seen": 70742176, "step": 1129 }, { "epoch": 3.760399334442596, "grad_norm": 14.114124298095703, "learning_rate": 5e-06, "loss": 0.6735, "num_input_tokens_seen": 70804608, "step": 1130 }, { "epoch": 3.760399334442596, "loss": 0.4799818992614746, "loss_ce": 0.0007338316063396633, "loss_iou": 0.1591796875, "loss_num": 0.031982421875, "loss_xval": 0.478515625, "num_input_tokens_seen": 70804608, "step": 1130 }, { "epoch": 3.7637271214642265, "grad_norm": 21.7076473236084, "learning_rate": 5e-06, "loss": 0.7799, "num_input_tokens_seen": 70867088, "step": 1131 }, { "epoch": 3.7637271214642265, "loss": 0.4688870310783386, "loss_ce": 0.00016756025434006006, "loss_iou": 0.1640625, "loss_num": 0.0281982421875, "loss_xval": 0.46875, "num_input_tokens_seen": 70867088, "step": 1131 }, { "epoch": 3.767054908485857, "grad_norm": 10.689167022705078, "learning_rate": 5e-06, "loss": 0.6789, "num_input_tokens_seen": 70928940, "step": 1132 }, { "epoch": 3.767054908485857, "loss": 0.9793890714645386, "loss_ce": 0.0003851721412502229, "loss_iou": 0.373046875, "loss_num": 0.046875, "loss_xval": 0.98046875, "num_input_tokens_seen": 70928940, "step": 1132 }, { "epoch": 3.7703826955074877, "grad_norm": 18.24936866760254, "learning_rate": 5e-06, "loss": 0.8163, "num_input_tokens_seen": 70989924, "step": 1133 }, { "epoch": 3.7703826955074877, "loss": 0.7339310646057129, "loss_ce": 4.4314670958556235e-05, "loss_iou": 0.166015625, "loss_num": 0.08056640625, "loss_xval": 0.734375, "num_input_tokens_seen": 70989924, "step": 1133 }, { "epoch": 3.7737104825291183, "grad_norm": 22.505451202392578, "learning_rate": 5e-06, "loss": 0.5424, "num_input_tokens_seen": 71051320, "step": 1134 }, { "epoch": 3.7737104825291183, "loss": 0.7140070199966431, "loss_ce": 0.0006280855741351843, "loss_iou": 0.255859375, "loss_num": 0.040771484375, "loss_xval": 0.71484375, "num_input_tokens_seen": 71051320, "step": 1134 }, { "epoch": 3.777038269550749, "grad_norm": 8.693553924560547, "learning_rate": 5e-06, "loss": 0.5119, "num_input_tokens_seen": 71112332, "step": 1135 }, { "epoch": 3.777038269550749, "loss": 0.5159156322479248, "loss_ce": 3.124218710581772e-05, "loss_iou": 0.19140625, "loss_num": 0.0264892578125, "loss_xval": 0.515625, "num_input_tokens_seen": 71112332, "step": 1135 }, { "epoch": 3.7803660565723796, "grad_norm": 12.785008430480957, "learning_rate": 5e-06, "loss": 0.6925, "num_input_tokens_seen": 71175924, "step": 1136 }, { "epoch": 3.7803660565723796, "loss": 0.6734526753425598, "loss_ce": 0.0007231989875435829, "loss_iou": 0.2373046875, "loss_num": 0.03955078125, "loss_xval": 0.671875, "num_input_tokens_seen": 71175924, "step": 1136 }, { "epoch": 3.78369384359401, "grad_norm": 11.817228317260742, "learning_rate": 5e-06, "loss": 0.9368, "num_input_tokens_seen": 71238860, "step": 1137 }, { "epoch": 3.78369384359401, "loss": 0.8826572895050049, "loss_ce": 0.00021094588737469167, "loss_iou": 0.271484375, "loss_num": 0.06787109375, "loss_xval": 0.8828125, "num_input_tokens_seen": 71238860, "step": 1137 }, { "epoch": 3.787021630615641, "grad_norm": 13.229910850524902, "learning_rate": 5e-06, "loss": 0.6284, "num_input_tokens_seen": 71301228, "step": 1138 }, { "epoch": 3.787021630615641, "loss": 0.7485630512237549, "loss_ce": 0.0002720615011639893, "loss_iou": 0.271484375, "loss_num": 0.041015625, "loss_xval": 0.75, "num_input_tokens_seen": 71301228, "step": 1138 }, { "epoch": 3.7903494176372714, "grad_norm": 39.25370788574219, "learning_rate": 5e-06, "loss": 0.7781, "num_input_tokens_seen": 71364208, "step": 1139 }, { "epoch": 3.7903494176372714, "loss": 0.860191822052002, "loss_ce": 8.443903061561286e-05, "loss_iou": 0.314453125, "loss_num": 0.04638671875, "loss_xval": 0.859375, "num_input_tokens_seen": 71364208, "step": 1139 }, { "epoch": 3.793677204658902, "grad_norm": 8.552867889404297, "learning_rate": 5e-06, "loss": 0.6363, "num_input_tokens_seen": 71425864, "step": 1140 }, { "epoch": 3.793677204658902, "loss": 0.6165646314620972, "loss_ce": 0.000842000765260309, "loss_iou": 0.140625, "loss_num": 0.06689453125, "loss_xval": 0.6171875, "num_input_tokens_seen": 71425864, "step": 1140 }, { "epoch": 3.7970049916805326, "grad_norm": 7.005518436431885, "learning_rate": 5e-06, "loss": 0.7277, "num_input_tokens_seen": 71489608, "step": 1141 }, { "epoch": 3.7970049916805326, "loss": 0.7442701458930969, "loss_ce": 0.0013502332149073482, "loss_iou": 0.248046875, "loss_num": 0.049072265625, "loss_xval": 0.7421875, "num_input_tokens_seen": 71489608, "step": 1141 }, { "epoch": 3.8003327787021632, "grad_norm": 7.645864486694336, "learning_rate": 5e-06, "loss": 0.6064, "num_input_tokens_seen": 71551324, "step": 1142 }, { "epoch": 3.8003327787021632, "loss": 0.48682481050491333, "loss_ce": 0.00025257206289097667, "loss_iou": 0.1767578125, "loss_num": 0.0267333984375, "loss_xval": 0.486328125, "num_input_tokens_seen": 71551324, "step": 1142 }, { "epoch": 3.803660565723794, "grad_norm": 11.24929141998291, "learning_rate": 5e-06, "loss": 0.6623, "num_input_tokens_seen": 71614812, "step": 1143 }, { "epoch": 3.803660565723794, "loss": 0.5440496206283569, "loss_ce": 4.3287254811730236e-05, "loss_iou": 0.2021484375, "loss_num": 0.028076171875, "loss_xval": 0.54296875, "num_input_tokens_seen": 71614812, "step": 1143 }, { "epoch": 3.8069883527454245, "grad_norm": 14.913579940795898, "learning_rate": 5e-06, "loss": 1.1182, "num_input_tokens_seen": 71678540, "step": 1144 }, { "epoch": 3.8069883527454245, "loss": 0.9577356576919556, "loss_ce": 0.0007043948862701654, "loss_iou": 0.33203125, "loss_num": 0.058349609375, "loss_xval": 0.95703125, "num_input_tokens_seen": 71678540, "step": 1144 }, { "epoch": 3.810316139767055, "grad_norm": 9.077905654907227, "learning_rate": 5e-06, "loss": 0.6147, "num_input_tokens_seen": 71739620, "step": 1145 }, { "epoch": 3.810316139767055, "loss": 0.5191451907157898, "loss_ce": 4.1206356399925426e-05, "loss_iou": 0.1357421875, "loss_num": 0.049560546875, "loss_xval": 0.51953125, "num_input_tokens_seen": 71739620, "step": 1145 }, { "epoch": 3.8136439267886857, "grad_norm": 19.119752883911133, "learning_rate": 5e-06, "loss": 0.6505, "num_input_tokens_seen": 71801800, "step": 1146 }, { "epoch": 3.8136439267886857, "loss": 0.6102641820907593, "loss_ce": 0.0006450038054026663, "loss_iou": 0.1884765625, "loss_num": 0.04638671875, "loss_xval": 0.609375, "num_input_tokens_seen": 71801800, "step": 1146 }, { "epoch": 3.8169717138103163, "grad_norm": 24.023101806640625, "learning_rate": 5e-06, "loss": 0.6411, "num_input_tokens_seen": 71866032, "step": 1147 }, { "epoch": 3.8169717138103163, "loss": 0.600353479385376, "loss_ce": 1.1719953363353852e-05, "loss_iou": 0.2265625, "loss_num": 0.0294189453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 71866032, "step": 1147 }, { "epoch": 3.820299500831947, "grad_norm": 11.998858451843262, "learning_rate": 5e-06, "loss": 0.7952, "num_input_tokens_seen": 71926820, "step": 1148 }, { "epoch": 3.820299500831947, "loss": 0.8667431473731995, "loss_ce": 0.001142566092312336, "loss_iou": 0.310546875, "loss_num": 0.048583984375, "loss_xval": 0.8671875, "num_input_tokens_seen": 71926820, "step": 1148 }, { "epoch": 3.8236272878535775, "grad_norm": 23.826696395874023, "learning_rate": 5e-06, "loss": 0.6687, "num_input_tokens_seen": 71989184, "step": 1149 }, { "epoch": 3.8236272878535775, "loss": 0.6472935080528259, "loss_ce": 0.0059361206367611885, "loss_iou": 0.1533203125, "loss_num": 0.0673828125, "loss_xval": 0.640625, "num_input_tokens_seen": 71989184, "step": 1149 }, { "epoch": 3.826955074875208, "grad_norm": 15.457709312438965, "learning_rate": 5e-06, "loss": 0.6533, "num_input_tokens_seen": 72052768, "step": 1150 }, { "epoch": 3.826955074875208, "loss": 0.8420194983482361, "loss_ce": 0.0013212742051109672, "loss_iou": 0.30078125, "loss_num": 0.04736328125, "loss_xval": 0.83984375, "num_input_tokens_seen": 72052768, "step": 1150 }, { "epoch": 3.8302828618968388, "grad_norm": 11.85561466217041, "learning_rate": 5e-06, "loss": 0.5567, "num_input_tokens_seen": 72115176, "step": 1151 }, { "epoch": 3.8302828618968388, "loss": 0.491475909948349, "loss_ce": 0.00014292128616943955, "loss_iou": 0.1455078125, "loss_num": 0.0400390625, "loss_xval": 0.4921875, "num_input_tokens_seen": 72115176, "step": 1151 }, { "epoch": 3.8336106489184694, "grad_norm": 14.689626693725586, "learning_rate": 5e-06, "loss": 0.7026, "num_input_tokens_seen": 72176716, "step": 1152 }, { "epoch": 3.8336106489184694, "loss": 0.9168117046356201, "loss_ce": 0.00030778077780269086, "loss_iou": 0.310546875, "loss_num": 0.05908203125, "loss_xval": 0.91796875, "num_input_tokens_seen": 72176716, "step": 1152 }, { "epoch": 3.8369384359401, "grad_norm": 13.6842679977417, "learning_rate": 5e-06, "loss": 0.9622, "num_input_tokens_seen": 72239940, "step": 1153 }, { "epoch": 3.8369384359401, "loss": 0.9152591824531555, "loss_ce": 0.0009526039939373732, "loss_iou": 0.314453125, "loss_num": 0.057373046875, "loss_xval": 0.9140625, "num_input_tokens_seen": 72239940, "step": 1153 }, { "epoch": 3.8402662229617306, "grad_norm": 11.269613265991211, "learning_rate": 5e-06, "loss": 0.7609, "num_input_tokens_seen": 72301720, "step": 1154 }, { "epoch": 3.8402662229617306, "loss": 0.6302889585494995, "loss_ce": 0.001016515656374395, "loss_iou": 0.22265625, "loss_num": 0.037109375, "loss_xval": 0.62890625, "num_input_tokens_seen": 72301720, "step": 1154 }, { "epoch": 3.843594009983361, "grad_norm": 21.826889038085938, "learning_rate": 5e-06, "loss": 1.0018, "num_input_tokens_seen": 72363092, "step": 1155 }, { "epoch": 3.843594009983361, "loss": 1.0488756895065308, "loss_ce": 0.0009020923171192408, "loss_iou": 0.34375, "loss_num": 0.072265625, "loss_xval": 1.046875, "num_input_tokens_seen": 72363092, "step": 1155 }, { "epoch": 3.846921797004992, "grad_norm": 27.468761444091797, "learning_rate": 5e-06, "loss": 0.9151, "num_input_tokens_seen": 72427664, "step": 1156 }, { "epoch": 3.846921797004992, "loss": 0.9939867258071899, "loss_ce": 0.0002123451413353905, "loss_iou": 0.38671875, "loss_num": 0.0439453125, "loss_xval": 0.9921875, "num_input_tokens_seen": 72427664, "step": 1156 }, { "epoch": 3.8502495840266224, "grad_norm": 20.527379989624023, "learning_rate": 5e-06, "loss": 0.9454, "num_input_tokens_seen": 72492032, "step": 1157 }, { "epoch": 3.8502495840266224, "loss": 0.8400707244873047, "loss_ce": 0.0010204899590462446, "loss_iou": 0.279296875, "loss_num": 0.055908203125, "loss_xval": 0.83984375, "num_input_tokens_seen": 72492032, "step": 1157 }, { "epoch": 3.853577371048253, "grad_norm": 8.549810409545898, "learning_rate": 5e-06, "loss": 0.7699, "num_input_tokens_seen": 72554224, "step": 1158 }, { "epoch": 3.853577371048253, "loss": 0.627190351486206, "loss_ce": 0.0021292981691658497, "loss_iou": 0.173828125, "loss_num": 0.055419921875, "loss_xval": 0.625, "num_input_tokens_seen": 72554224, "step": 1158 }, { "epoch": 3.8569051580698837, "grad_norm": 21.32713508605957, "learning_rate": 5e-06, "loss": 0.6667, "num_input_tokens_seen": 72617400, "step": 1159 }, { "epoch": 3.8569051580698837, "loss": 0.8027868270874023, "loss_ce": 5.2467978093773127e-05, "loss_iou": 0.265625, "loss_num": 0.05419921875, "loss_xval": 0.8046875, "num_input_tokens_seen": 72617400, "step": 1159 }, { "epoch": 3.8602329450915143, "grad_norm": 12.14395809173584, "learning_rate": 5e-06, "loss": 0.7003, "num_input_tokens_seen": 72679172, "step": 1160 }, { "epoch": 3.8602329450915143, "loss": 0.6025677919387817, "loss_ce": 2.867916555260308e-05, "loss_iou": 0.2236328125, "loss_num": 0.0308837890625, "loss_xval": 0.6015625, "num_input_tokens_seen": 72679172, "step": 1160 }, { "epoch": 3.863560732113145, "grad_norm": 19.726367950439453, "learning_rate": 5e-06, "loss": 0.6465, "num_input_tokens_seen": 72742260, "step": 1161 }, { "epoch": 3.863560732113145, "loss": 0.6337836980819702, "loss_ce": 0.00011673450353555381, "loss_iou": 0.2294921875, "loss_num": 0.034912109375, "loss_xval": 0.6328125, "num_input_tokens_seen": 72742260, "step": 1161 }, { "epoch": 3.8668885191347755, "grad_norm": 15.199066162109375, "learning_rate": 5e-06, "loss": 0.6762, "num_input_tokens_seen": 72803396, "step": 1162 }, { "epoch": 3.8668885191347755, "loss": 0.8257670998573303, "loss_ce": 8.353222074219957e-05, "loss_iou": 0.279296875, "loss_num": 0.0537109375, "loss_xval": 0.82421875, "num_input_tokens_seen": 72803396, "step": 1162 }, { "epoch": 3.870216306156406, "grad_norm": 27.860286712646484, "learning_rate": 5e-06, "loss": 0.7779, "num_input_tokens_seen": 72865844, "step": 1163 }, { "epoch": 3.870216306156406, "loss": 0.9027190208435059, "loss_ce": 0.0006194374873302877, "loss_iou": 0.275390625, "loss_num": 0.07080078125, "loss_xval": 0.90234375, "num_input_tokens_seen": 72865844, "step": 1163 }, { "epoch": 3.8735440931780367, "grad_norm": 22.976346969604492, "learning_rate": 5e-06, "loss": 0.8613, "num_input_tokens_seen": 72926168, "step": 1164 }, { "epoch": 3.8735440931780367, "loss": 1.036806583404541, "loss_ce": 0.0006737524527125061, "loss_iou": 0.380859375, "loss_num": 0.054931640625, "loss_xval": 1.0390625, "num_input_tokens_seen": 72926168, "step": 1164 }, { "epoch": 3.8768718801996673, "grad_norm": 8.25428581237793, "learning_rate": 5e-06, "loss": 0.7253, "num_input_tokens_seen": 72987380, "step": 1165 }, { "epoch": 3.8768718801996673, "loss": 0.6392136216163635, "loss_ce": 5.344775854609907e-05, "loss_iou": 0.2216796875, "loss_num": 0.0390625, "loss_xval": 0.640625, "num_input_tokens_seen": 72987380, "step": 1165 }, { "epoch": 3.880199667221298, "grad_norm": 16.900022506713867, "learning_rate": 5e-06, "loss": 0.7989, "num_input_tokens_seen": 73051392, "step": 1166 }, { "epoch": 3.880199667221298, "loss": 0.7662708759307861, "loss_ce": 0.0011341598583385348, "loss_iou": 0.26953125, "loss_num": 0.044921875, "loss_xval": 0.765625, "num_input_tokens_seen": 73051392, "step": 1166 }, { "epoch": 3.8835274542429286, "grad_norm": 14.377167701721191, "learning_rate": 5e-06, "loss": 0.763, "num_input_tokens_seen": 73113100, "step": 1167 }, { "epoch": 3.8835274542429286, "loss": 0.5086641311645508, "loss_ce": 0.00024124824267346412, "loss_iou": 0.185546875, "loss_num": 0.0274658203125, "loss_xval": 0.5078125, "num_input_tokens_seen": 73113100, "step": 1167 }, { "epoch": 3.886855241264559, "grad_norm": 9.697162628173828, "learning_rate": 5e-06, "loss": 0.8776, "num_input_tokens_seen": 73176080, "step": 1168 }, { "epoch": 3.886855241264559, "loss": 0.7365698218345642, "loss_ce": 0.0007299709832295775, "loss_iou": 0.26171875, "loss_num": 0.04296875, "loss_xval": 0.734375, "num_input_tokens_seen": 73176080, "step": 1168 }, { "epoch": 3.89018302828619, "grad_norm": 7.753446578979492, "learning_rate": 5e-06, "loss": 0.6761, "num_input_tokens_seen": 73239540, "step": 1169 }, { "epoch": 3.89018302828619, "loss": 0.8952956199645996, "loss_ce": 0.0007643617573194206, "loss_iou": 0.32421875, "loss_num": 0.049072265625, "loss_xval": 0.89453125, "num_input_tokens_seen": 73239540, "step": 1169 }, { "epoch": 3.8935108153078204, "grad_norm": 38.60930633544922, "learning_rate": 5e-06, "loss": 0.9386, "num_input_tokens_seen": 73301948, "step": 1170 }, { "epoch": 3.8935108153078204, "loss": 0.8330338001251221, "loss_ce": 0.0005142604932188988, "loss_iou": 0.26171875, "loss_num": 0.0615234375, "loss_xval": 0.83203125, "num_input_tokens_seen": 73301948, "step": 1170 }, { "epoch": 3.896838602329451, "grad_norm": 44.411895751953125, "learning_rate": 5e-06, "loss": 0.877, "num_input_tokens_seen": 73364536, "step": 1171 }, { "epoch": 3.896838602329451, "loss": 0.9410591125488281, "loss_ce": 0.0005683889612555504, "loss_iou": 0.3046875, "loss_num": 0.06591796875, "loss_xval": 0.94140625, "num_input_tokens_seen": 73364536, "step": 1171 }, { "epoch": 3.9001663893510816, "grad_norm": 15.136682510375977, "learning_rate": 5e-06, "loss": 0.5564, "num_input_tokens_seen": 73426144, "step": 1172 }, { "epoch": 3.9001663893510816, "loss": 0.4998212456703186, "loss_ce": 0.00012645090464502573, "loss_iou": 0.166015625, "loss_num": 0.033447265625, "loss_xval": 0.5, "num_input_tokens_seen": 73426144, "step": 1172 }, { "epoch": 3.9034941763727122, "grad_norm": 31.158117294311523, "learning_rate": 5e-06, "loss": 0.6874, "num_input_tokens_seen": 73488980, "step": 1173 }, { "epoch": 3.9034941763727122, "loss": 0.6511569023132324, "loss_ce": 3.384947558515705e-05, "loss_iou": 0.26171875, "loss_num": 0.0255126953125, "loss_xval": 0.65234375, "num_input_tokens_seen": 73488980, "step": 1173 }, { "epoch": 3.906821963394343, "grad_norm": 18.398252487182617, "learning_rate": 5e-06, "loss": 0.7025, "num_input_tokens_seen": 73550568, "step": 1174 }, { "epoch": 3.906821963394343, "loss": 0.8032513856887817, "loss_ce": 0.0012493999674916267, "loss_iou": 0.2578125, "loss_num": 0.05712890625, "loss_xval": 0.80078125, "num_input_tokens_seen": 73550568, "step": 1174 }, { "epoch": 3.9101497504159735, "grad_norm": 14.897561073303223, "learning_rate": 5e-06, "loss": 0.6192, "num_input_tokens_seen": 73614048, "step": 1175 }, { "epoch": 3.9101497504159735, "loss": 0.6096196174621582, "loss_ce": 0.000488759484142065, "loss_iou": 0.1767578125, "loss_num": 0.051025390625, "loss_xval": 0.609375, "num_input_tokens_seen": 73614048, "step": 1175 }, { "epoch": 3.913477537437604, "grad_norm": 12.411521911621094, "learning_rate": 5e-06, "loss": 0.7318, "num_input_tokens_seen": 73677156, "step": 1176 }, { "epoch": 3.913477537437604, "loss": 0.7434451580047607, "loss_ce": 0.0008914528880268335, "loss_iou": 0.265625, "loss_num": 0.04248046875, "loss_xval": 0.7421875, "num_input_tokens_seen": 73677156, "step": 1176 }, { "epoch": 3.9168053244592347, "grad_norm": 12.991275787353516, "learning_rate": 5e-06, "loss": 0.7032, "num_input_tokens_seen": 73739192, "step": 1177 }, { "epoch": 3.9168053244592347, "loss": 0.7536220550537109, "loss_ce": 0.00020409880380611867, "loss_iou": 0.287109375, "loss_num": 0.035400390625, "loss_xval": 0.75390625, "num_input_tokens_seen": 73739192, "step": 1177 }, { "epoch": 3.9201331114808653, "grad_norm": 12.387129783630371, "learning_rate": 5e-06, "loss": 0.4625, "num_input_tokens_seen": 73801340, "step": 1178 }, { "epoch": 3.9201331114808653, "loss": 0.4774191081523895, "loss_ce": 0.0004903915687464178, "loss_iou": 0.10791015625, "loss_num": 0.05224609375, "loss_xval": 0.4765625, "num_input_tokens_seen": 73801340, "step": 1178 }, { "epoch": 3.923460898502496, "grad_norm": 13.816216468811035, "learning_rate": 5e-06, "loss": 0.5731, "num_input_tokens_seen": 73863168, "step": 1179 }, { "epoch": 3.923460898502496, "loss": 0.4799365699291229, "loss_ce": 1.7145994206657633e-05, "loss_iou": 0.177734375, "loss_num": 0.02490234375, "loss_xval": 0.48046875, "num_input_tokens_seen": 73863168, "step": 1179 }, { "epoch": 3.9267886855241265, "grad_norm": 13.6268310546875, "learning_rate": 5e-06, "loss": 0.5817, "num_input_tokens_seen": 73925652, "step": 1180 }, { "epoch": 3.9267886855241265, "loss": 0.46266400814056396, "loss_ce": 1.7535534425405785e-05, "loss_iou": 0.1494140625, "loss_num": 0.03271484375, "loss_xval": 0.462890625, "num_input_tokens_seen": 73925652, "step": 1180 }, { "epoch": 3.930116472545757, "grad_norm": 10.935044288635254, "learning_rate": 5e-06, "loss": 0.7337, "num_input_tokens_seen": 73988132, "step": 1181 }, { "epoch": 3.930116472545757, "loss": 0.714240312576294, "loss_ce": 0.000251059333095327, "loss_iou": 0.201171875, "loss_num": 0.0625, "loss_xval": 0.71484375, "num_input_tokens_seen": 73988132, "step": 1181 }, { "epoch": 3.9334442595673877, "grad_norm": 21.19894790649414, "learning_rate": 5e-06, "loss": 0.7096, "num_input_tokens_seen": 74051356, "step": 1182 }, { "epoch": 3.9334442595673877, "loss": 0.7160288691520691, "loss_ce": 0.00020855304319411516, "loss_iou": 0.220703125, "loss_num": 0.054931640625, "loss_xval": 0.71484375, "num_input_tokens_seen": 74051356, "step": 1182 }, { "epoch": 3.9367720465890184, "grad_norm": 22.308738708496094, "learning_rate": 5e-06, "loss": 1.0778, "num_input_tokens_seen": 74115356, "step": 1183 }, { "epoch": 3.9367720465890184, "loss": 1.0566372871398926, "loss_ce": 0.0003629096318036318, "loss_iou": 0.349609375, "loss_num": 0.0712890625, "loss_xval": 1.0546875, "num_input_tokens_seen": 74115356, "step": 1183 }, { "epoch": 3.940099833610649, "grad_norm": 12.43722152709961, "learning_rate": 5e-06, "loss": 0.7557, "num_input_tokens_seen": 74178668, "step": 1184 }, { "epoch": 3.940099833610649, "loss": 0.5400795936584473, "loss_ce": 4.0542239730712026e-05, "loss_iou": 0.2021484375, "loss_num": 0.027099609375, "loss_xval": 0.5390625, "num_input_tokens_seen": 74178668, "step": 1184 }, { "epoch": 3.9434276206322796, "grad_norm": 12.8514404296875, "learning_rate": 5e-06, "loss": 0.706, "num_input_tokens_seen": 74240964, "step": 1185 }, { "epoch": 3.9434276206322796, "loss": 0.8476996421813965, "loss_ce": 0.0007758617866784334, "loss_iou": 0.2578125, "loss_num": 0.06591796875, "loss_xval": 0.84765625, "num_input_tokens_seen": 74240964, "step": 1185 }, { "epoch": 3.94675540765391, "grad_norm": 17.675451278686523, "learning_rate": 5e-06, "loss": 0.8197, "num_input_tokens_seen": 74304024, "step": 1186 }, { "epoch": 3.94675540765391, "loss": 0.9300236701965332, "loss_ce": 9.207585389958695e-05, "loss_iou": 0.337890625, "loss_num": 0.05078125, "loss_xval": 0.9296875, "num_input_tokens_seen": 74304024, "step": 1186 }, { "epoch": 3.950083194675541, "grad_norm": 8.358492851257324, "learning_rate": 5e-06, "loss": 0.7479, "num_input_tokens_seen": 74367408, "step": 1187 }, { "epoch": 3.950083194675541, "loss": 0.6704340577125549, "loss_ce": 0.0016108033014461398, "loss_iou": 0.220703125, "loss_num": 0.045166015625, "loss_xval": 0.66796875, "num_input_tokens_seen": 74367408, "step": 1187 }, { "epoch": 3.9534109816971714, "grad_norm": 10.79404067993164, "learning_rate": 5e-06, "loss": 0.7976, "num_input_tokens_seen": 74430972, "step": 1188 }, { "epoch": 3.9534109816971714, "loss": 0.8109077215194702, "loss_ce": 0.0008491338812746108, "loss_iou": 0.3046875, "loss_num": 0.040283203125, "loss_xval": 0.80859375, "num_input_tokens_seen": 74430972, "step": 1188 }, { "epoch": 3.956738768718802, "grad_norm": 10.02695083618164, "learning_rate": 5e-06, "loss": 0.6818, "num_input_tokens_seen": 74493320, "step": 1189 }, { "epoch": 3.956738768718802, "loss": 0.6926209330558777, "loss_ce": 0.0002991709334310144, "loss_iou": 0.23828125, "loss_num": 0.043212890625, "loss_xval": 0.69140625, "num_input_tokens_seen": 74493320, "step": 1189 }, { "epoch": 3.9600665557404326, "grad_norm": 15.090518951416016, "learning_rate": 5e-06, "loss": 0.6759, "num_input_tokens_seen": 74556176, "step": 1190 }, { "epoch": 3.9600665557404326, "loss": 0.37127041816711426, "loss_ce": 0.0005428859149105847, "loss_iou": 0.10888671875, "loss_num": 0.0303955078125, "loss_xval": 0.37109375, "num_input_tokens_seen": 74556176, "step": 1190 }, { "epoch": 3.9633943427620633, "grad_norm": 15.567995071411133, "learning_rate": 5e-06, "loss": 0.8636, "num_input_tokens_seen": 74619904, "step": 1191 }, { "epoch": 3.9633943427620633, "loss": 1.1187453269958496, "loss_ce": 0.001313702785409987, "loss_iou": 0.421875, "loss_num": 0.05517578125, "loss_xval": 1.1171875, "num_input_tokens_seen": 74619904, "step": 1191 }, { "epoch": 3.966722129783694, "grad_norm": 20.189136505126953, "learning_rate": 5e-06, "loss": 0.899, "num_input_tokens_seen": 74683056, "step": 1192 }, { "epoch": 3.966722129783694, "loss": 0.6560426950454712, "loss_ce": 0.002234060550108552, "loss_iou": 0.1826171875, "loss_num": 0.057373046875, "loss_xval": 0.65234375, "num_input_tokens_seen": 74683056, "step": 1192 }, { "epoch": 3.9700499168053245, "grad_norm": 15.30156135559082, "learning_rate": 5e-06, "loss": 0.5481, "num_input_tokens_seen": 74744020, "step": 1193 }, { "epoch": 3.9700499168053245, "loss": 0.45218271017074585, "loss_ce": 3.4305761801078916e-05, "loss_iou": 0.134765625, "loss_num": 0.03662109375, "loss_xval": 0.453125, "num_input_tokens_seen": 74744020, "step": 1193 }, { "epoch": 3.973377703826955, "grad_norm": 18.261381149291992, "learning_rate": 5e-06, "loss": 0.6689, "num_input_tokens_seen": 74806148, "step": 1194 }, { "epoch": 3.973377703826955, "loss": 0.6880115866661072, "loss_ce": 2.3293252525036223e-05, "loss_iou": 0.2060546875, "loss_num": 0.05517578125, "loss_xval": 0.6875, "num_input_tokens_seen": 74806148, "step": 1194 }, { "epoch": 3.9767054908485857, "grad_norm": 35.95396423339844, "learning_rate": 5e-06, "loss": 0.9737, "num_input_tokens_seen": 74870276, "step": 1195 }, { "epoch": 3.9767054908485857, "loss": 1.2164342403411865, "loss_ce": 0.003543566446751356, "loss_iou": 0.408203125, "loss_num": 0.07958984375, "loss_xval": 1.2109375, "num_input_tokens_seen": 74870276, "step": 1195 }, { "epoch": 3.9800332778702163, "grad_norm": 15.76313304901123, "learning_rate": 5e-06, "loss": 0.7284, "num_input_tokens_seen": 74933660, "step": 1196 }, { "epoch": 3.9800332778702163, "loss": 0.8802682161331177, "loss_ce": 0.0006294772610999644, "loss_iou": 0.2578125, "loss_num": 0.07275390625, "loss_xval": 0.87890625, "num_input_tokens_seen": 74933660, "step": 1196 }, { "epoch": 3.983361064891847, "grad_norm": 8.434407234191895, "learning_rate": 5e-06, "loss": 0.6092, "num_input_tokens_seen": 74997052, "step": 1197 }, { "epoch": 3.983361064891847, "loss": 0.511523962020874, "loss_ce": 0.0004155955102760345, "loss_iou": 0.1650390625, "loss_num": 0.0361328125, "loss_xval": 0.51171875, "num_input_tokens_seen": 74997052, "step": 1197 }, { "epoch": 3.9866888519134775, "grad_norm": 10.437862396240234, "learning_rate": 5e-06, "loss": 0.7356, "num_input_tokens_seen": 75058944, "step": 1198 }, { "epoch": 3.9866888519134775, "loss": 0.9156403541564941, "loss_ce": 0.0020660818554461002, "loss_iou": 0.294921875, "loss_num": 0.06396484375, "loss_xval": 0.9140625, "num_input_tokens_seen": 75058944, "step": 1198 }, { "epoch": 3.990016638935108, "grad_norm": 22.736345291137695, "learning_rate": 5e-06, "loss": 0.601, "num_input_tokens_seen": 75120440, "step": 1199 }, { "epoch": 3.990016638935108, "loss": 0.6733030080795288, "loss_ce": 0.0006955533754080534, "loss_iou": 0.2314453125, "loss_num": 0.0419921875, "loss_xval": 0.671875, "num_input_tokens_seen": 75120440, "step": 1199 }, { "epoch": 3.9933444259567388, "grad_norm": 19.93775177001953, "learning_rate": 5e-06, "loss": 0.5766, "num_input_tokens_seen": 75180068, "step": 1200 }, { "epoch": 3.9933444259567388, "loss": 0.502943754196167, "loss_ce": 1.4059570276003797e-05, "loss_iou": 0.1240234375, "loss_num": 0.051025390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 75180068, "step": 1200 }, { "epoch": 3.9966722129783694, "grad_norm": 7.942922592163086, "learning_rate": 5e-06, "loss": 0.6018, "num_input_tokens_seen": 75242588, "step": 1201 }, { "epoch": 3.9966722129783694, "loss": 0.5372599363327026, "loss_ce": 0.0003947424702346325, "loss_iou": 0.2041015625, "loss_num": 0.0257568359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 75242588, "step": 1201 }, { "epoch": 4.0, "grad_norm": 15.248062133789062, "learning_rate": 5e-06, "loss": 0.6613, "num_input_tokens_seen": 75304732, "step": 1202 }, { "epoch": 4.0, "loss": 0.7052999138832092, "loss_ce": 0.000648995628580451, "loss_iou": 0.177734375, "loss_num": 0.06982421875, "loss_xval": 0.703125, "num_input_tokens_seen": 75304732, "step": 1202 }, { "epoch": 4.003327787021631, "grad_norm": 11.188032150268555, "learning_rate": 5e-06, "loss": 0.7275, "num_input_tokens_seen": 75366540, "step": 1203 }, { "epoch": 4.003327787021631, "loss": 0.7967070937156677, "loss_ce": 0.0008086481248028576, "loss_iou": 0.298828125, "loss_num": 0.039794921875, "loss_xval": 0.796875, "num_input_tokens_seen": 75366540, "step": 1203 }, { "epoch": 4.006655574043261, "grad_norm": 5.265565872192383, "learning_rate": 5e-06, "loss": 0.3553, "num_input_tokens_seen": 75427768, "step": 1204 }, { "epoch": 4.006655574043261, "loss": 0.2486070692539215, "loss_ce": 1.0882431524805725e-05, "loss_iou": 0.0, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 75427768, "step": 1204 }, { "epoch": 4.009983361064892, "grad_norm": 73.91722106933594, "learning_rate": 5e-06, "loss": 0.7568, "num_input_tokens_seen": 75491840, "step": 1205 }, { "epoch": 4.009983361064892, "loss": 0.8017033338546753, "loss_ce": 0.000922084494959563, "loss_iou": 0.279296875, "loss_num": 0.04833984375, "loss_xval": 0.80078125, "num_input_tokens_seen": 75491840, "step": 1205 }, { "epoch": 4.0133111480865225, "grad_norm": 19.91536521911621, "learning_rate": 5e-06, "loss": 0.5606, "num_input_tokens_seen": 75553160, "step": 1206 }, { "epoch": 4.0133111480865225, "loss": 0.5015987157821655, "loss_ce": 0.00013387855142354965, "loss_iou": 0.1376953125, "loss_num": 0.045166015625, "loss_xval": 0.5, "num_input_tokens_seen": 75553160, "step": 1206 }, { "epoch": 4.016638935108153, "grad_norm": 12.133269309997559, "learning_rate": 5e-06, "loss": 0.8538, "num_input_tokens_seen": 75618412, "step": 1207 }, { "epoch": 4.016638935108153, "loss": 0.9982300996780396, "loss_ce": 0.0014039267553016543, "loss_iou": 0.365234375, "loss_num": 0.0537109375, "loss_xval": 0.99609375, "num_input_tokens_seen": 75618412, "step": 1207 }, { "epoch": 4.019966722129784, "grad_norm": 8.875154495239258, "learning_rate": 5e-06, "loss": 0.775, "num_input_tokens_seen": 75682372, "step": 1208 }, { "epoch": 4.019966722129784, "loss": 0.8194925785064697, "loss_ce": 0.0001566612918395549, "loss_iou": 0.2890625, "loss_num": 0.04833984375, "loss_xval": 0.8203125, "num_input_tokens_seen": 75682372, "step": 1208 }, { "epoch": 4.023294509151414, "grad_norm": 10.672651290893555, "learning_rate": 5e-06, "loss": 0.4433, "num_input_tokens_seen": 75742552, "step": 1209 }, { "epoch": 4.023294509151414, "loss": 0.3726426362991333, "loss_ce": 2.2998472559265792e-05, "loss_iou": 0.05029296875, "loss_num": 0.054443359375, "loss_xval": 0.373046875, "num_input_tokens_seen": 75742552, "step": 1209 }, { "epoch": 4.026622296173045, "grad_norm": 31.792285919189453, "learning_rate": 5e-06, "loss": 0.7017, "num_input_tokens_seen": 75804336, "step": 1210 }, { "epoch": 4.026622296173045, "loss": 0.7170919179916382, "loss_ce": 0.0002950564958155155, "loss_iou": 0.2451171875, "loss_num": 0.045166015625, "loss_xval": 0.71875, "num_input_tokens_seen": 75804336, "step": 1210 }, { "epoch": 4.0299500831946755, "grad_norm": 14.573902130126953, "learning_rate": 5e-06, "loss": 0.6705, "num_input_tokens_seen": 75865872, "step": 1211 }, { "epoch": 4.0299500831946755, "loss": 0.7652605175971985, "loss_ce": 0.00030688103288412094, "loss_iou": 0.2314453125, "loss_num": 0.060302734375, "loss_xval": 0.765625, "num_input_tokens_seen": 75865872, "step": 1211 }, { "epoch": 4.033277870216306, "grad_norm": 17.535743713378906, "learning_rate": 5e-06, "loss": 0.6471, "num_input_tokens_seen": 75930268, "step": 1212 }, { "epoch": 4.033277870216306, "loss": 0.7741683721542358, "loss_ce": 0.0007308665663003922, "loss_iou": 0.2890625, "loss_num": 0.039306640625, "loss_xval": 0.7734375, "num_input_tokens_seen": 75930268, "step": 1212 }, { "epoch": 4.036605657237937, "grad_norm": 14.236082077026367, "learning_rate": 5e-06, "loss": 0.9975, "num_input_tokens_seen": 75994488, "step": 1213 }, { "epoch": 4.036605657237937, "loss": 1.1918745040893555, "loss_ce": 0.0007124332478269935, "loss_iou": 0.404296875, "loss_num": 0.07666015625, "loss_xval": 1.1875, "num_input_tokens_seen": 75994488, "step": 1213 }, { "epoch": 4.039933444259567, "grad_norm": 12.264684677124023, "learning_rate": 5e-06, "loss": 0.8542, "num_input_tokens_seen": 76057384, "step": 1214 }, { "epoch": 4.039933444259567, "loss": 1.0039153099060059, "loss_ce": 0.0009246146655641496, "loss_iou": 0.345703125, "loss_num": 0.062255859375, "loss_xval": 1.0, "num_input_tokens_seen": 76057384, "step": 1214 }, { "epoch": 4.043261231281198, "grad_norm": 11.338297843933105, "learning_rate": 5e-06, "loss": 0.6555, "num_input_tokens_seen": 76120200, "step": 1215 }, { "epoch": 4.043261231281198, "loss": 0.6709122061729431, "loss_ce": 1.3811413737130351e-05, "loss_iou": 0.2392578125, "loss_num": 0.038330078125, "loss_xval": 0.671875, "num_input_tokens_seen": 76120200, "step": 1215 }, { "epoch": 4.046589018302829, "grad_norm": 11.017167091369629, "learning_rate": 5e-06, "loss": 0.759, "num_input_tokens_seen": 76184652, "step": 1216 }, { "epoch": 4.046589018302829, "loss": 0.8067546486854553, "loss_ce": 0.0002360953949391842, "loss_iou": 0.294921875, "loss_num": 0.04296875, "loss_xval": 0.8046875, "num_input_tokens_seen": 76184652, "step": 1216 }, { "epoch": 4.049916805324459, "grad_norm": 9.538055419921875, "learning_rate": 5e-06, "loss": 0.659, "num_input_tokens_seen": 76248504, "step": 1217 }, { "epoch": 4.049916805324459, "loss": 0.6807469129562378, "loss_ce": 0.00020490906899794936, "loss_iou": 0.265625, "loss_num": 0.0299072265625, "loss_xval": 0.6796875, "num_input_tokens_seen": 76248504, "step": 1217 }, { "epoch": 4.05324459234609, "grad_norm": 13.316681861877441, "learning_rate": 5e-06, "loss": 0.7569, "num_input_tokens_seen": 76312336, "step": 1218 }, { "epoch": 4.05324459234609, "loss": 0.6233140230178833, "loss_ce": 0.00014506097068078816, "loss_iou": 0.2275390625, "loss_num": 0.033447265625, "loss_xval": 0.625, "num_input_tokens_seen": 76312336, "step": 1218 }, { "epoch": 4.05657237936772, "grad_norm": 18.77733039855957, "learning_rate": 5e-06, "loss": 0.7825, "num_input_tokens_seen": 76377484, "step": 1219 }, { "epoch": 4.05657237936772, "loss": 0.8869447708129883, "loss_ce": 0.001202607061713934, "loss_iou": 0.33984375, "loss_num": 0.041015625, "loss_xval": 0.88671875, "num_input_tokens_seen": 76377484, "step": 1219 }, { "epoch": 4.059900166389351, "grad_norm": 13.015058517456055, "learning_rate": 5e-06, "loss": 0.8573, "num_input_tokens_seen": 76439780, "step": 1220 }, { "epoch": 4.059900166389351, "loss": 0.7998440265655518, "loss_ce": 0.0010159012163057923, "loss_iou": 0.291015625, "loss_num": 0.04345703125, "loss_xval": 0.796875, "num_input_tokens_seen": 76439780, "step": 1220 }, { "epoch": 4.063227953410982, "grad_norm": 10.296722412109375, "learning_rate": 5e-06, "loss": 1.0222, "num_input_tokens_seen": 76500992, "step": 1221 }, { "epoch": 4.063227953410982, "loss": 0.8199893236160278, "loss_ce": 0.0005923279095441103, "loss_iou": 0.25390625, "loss_num": 0.062255859375, "loss_xval": 0.8203125, "num_input_tokens_seen": 76500992, "step": 1221 }, { "epoch": 4.066555740432612, "grad_norm": 16.238292694091797, "learning_rate": 5e-06, "loss": 0.971, "num_input_tokens_seen": 76564108, "step": 1222 }, { "epoch": 4.066555740432612, "loss": 1.0141180753707886, "loss_ce": 0.0008124337764456868, "loss_iou": 0.3984375, "loss_num": 0.04345703125, "loss_xval": 1.015625, "num_input_tokens_seen": 76564108, "step": 1222 }, { "epoch": 4.069883527454243, "grad_norm": 15.772029876708984, "learning_rate": 5e-06, "loss": 0.804, "num_input_tokens_seen": 76627116, "step": 1223 }, { "epoch": 4.069883527454243, "loss": 0.8473187685012817, "loss_ce": 2.8751601348631084e-05, "loss_iou": 0.31640625, "loss_num": 0.042724609375, "loss_xval": 0.84765625, "num_input_tokens_seen": 76627116, "step": 1223 }, { "epoch": 4.0732113144758735, "grad_norm": 21.51706314086914, "learning_rate": 5e-06, "loss": 0.757, "num_input_tokens_seen": 76688968, "step": 1224 }, { "epoch": 4.0732113144758735, "loss": 0.6889785528182983, "loss_ce": 1.3671827218786348e-05, "loss_iou": 0.2578125, "loss_num": 0.03466796875, "loss_xval": 0.6875, "num_input_tokens_seen": 76688968, "step": 1224 }, { "epoch": 4.076539101497504, "grad_norm": 13.671257019042969, "learning_rate": 5e-06, "loss": 0.6394, "num_input_tokens_seen": 76752252, "step": 1225 }, { "epoch": 4.076539101497504, "loss": 0.6889891624450684, "loss_ce": 0.00026844226522371173, "loss_iou": 0.275390625, "loss_num": 0.02734375, "loss_xval": 0.6875, "num_input_tokens_seen": 76752252, "step": 1225 }, { "epoch": 4.079866888519135, "grad_norm": 12.221083641052246, "learning_rate": 5e-06, "loss": 0.5801, "num_input_tokens_seen": 76814152, "step": 1226 }, { "epoch": 4.079866888519135, "loss": 0.5570855736732483, "loss_ce": 0.0003839263808913529, "loss_iou": 0.19921875, "loss_num": 0.03173828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 76814152, "step": 1226 }, { "epoch": 4.083194675540765, "grad_norm": 20.396621704101562, "learning_rate": 5e-06, "loss": 0.773, "num_input_tokens_seen": 76877564, "step": 1227 }, { "epoch": 4.083194675540765, "loss": 0.6958671808242798, "loss_ce": 6.636592297581956e-05, "loss_iou": 0.234375, "loss_num": 0.044921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 76877564, "step": 1227 }, { "epoch": 4.086522462562396, "grad_norm": 21.930482864379883, "learning_rate": 5e-06, "loss": 0.9511, "num_input_tokens_seen": 76943260, "step": 1228 }, { "epoch": 4.086522462562396, "loss": 1.138702630996704, "loss_ce": 0.0022280181292444468, "loss_iou": 0.44140625, "loss_num": 0.051025390625, "loss_xval": 1.1328125, "num_input_tokens_seen": 76943260, "step": 1228 }, { "epoch": 4.0898502495840265, "grad_norm": 10.00102424621582, "learning_rate": 5e-06, "loss": 0.6424, "num_input_tokens_seen": 77005432, "step": 1229 }, { "epoch": 4.0898502495840265, "loss": 0.47109830379486084, "loss_ce": 0.0005172473029233515, "loss_iou": 0.126953125, "loss_num": 0.04345703125, "loss_xval": 0.470703125, "num_input_tokens_seen": 77005432, "step": 1229 }, { "epoch": 4.093178036605657, "grad_norm": 15.279685020446777, "learning_rate": 5e-06, "loss": 0.8382, "num_input_tokens_seen": 77067552, "step": 1230 }, { "epoch": 4.093178036605657, "loss": 0.7615088820457458, "loss_ce": 3.428545824135654e-05, "loss_iou": 0.251953125, "loss_num": 0.051513671875, "loss_xval": 0.76171875, "num_input_tokens_seen": 77067552, "step": 1230 }, { "epoch": 4.096505823627288, "grad_norm": 24.48037338256836, "learning_rate": 5e-06, "loss": 0.7931, "num_input_tokens_seen": 77130584, "step": 1231 }, { "epoch": 4.096505823627288, "loss": 0.9837879538536072, "loss_ce": 2.3307909941649996e-05, "loss_iou": 0.345703125, "loss_num": 0.05810546875, "loss_xval": 0.984375, "num_input_tokens_seen": 77130584, "step": 1231 }, { "epoch": 4.099833610648918, "grad_norm": 35.03629684448242, "learning_rate": 5e-06, "loss": 1.0287, "num_input_tokens_seen": 77194704, "step": 1232 }, { "epoch": 4.099833610648918, "loss": 1.1050645112991333, "loss_ce": 0.0008164606406353414, "loss_iou": 0.365234375, "loss_num": 0.07470703125, "loss_xval": 1.1015625, "num_input_tokens_seen": 77194704, "step": 1232 }, { "epoch": 4.103161397670549, "grad_norm": 47.826690673828125, "learning_rate": 5e-06, "loss": 0.7708, "num_input_tokens_seen": 77258880, "step": 1233 }, { "epoch": 4.103161397670549, "loss": 0.8139855861663818, "loss_ce": 0.0007531545124948025, "loss_iou": 0.326171875, "loss_num": 0.031982421875, "loss_xval": 0.8125, "num_input_tokens_seen": 77258880, "step": 1233 }, { "epoch": 4.10648918469218, "grad_norm": 29.535999298095703, "learning_rate": 5e-06, "loss": 0.7673, "num_input_tokens_seen": 77321428, "step": 1234 }, { "epoch": 4.10648918469218, "loss": 0.7830252647399902, "loss_ce": 6.632165604969487e-05, "loss_iou": 0.294921875, "loss_num": 0.038818359375, "loss_xval": 0.78125, "num_input_tokens_seen": 77321428, "step": 1234 }, { "epoch": 4.10981697171381, "grad_norm": 21.408437728881836, "learning_rate": 5e-06, "loss": 0.7928, "num_input_tokens_seen": 77383604, "step": 1235 }, { "epoch": 4.10981697171381, "loss": 0.7510450482368469, "loss_ce": 0.0008008688455447555, "loss_iou": 0.259765625, "loss_num": 0.0458984375, "loss_xval": 0.75, "num_input_tokens_seen": 77383604, "step": 1235 }, { "epoch": 4.113144758735441, "grad_norm": 15.90986442565918, "learning_rate": 5e-06, "loss": 0.4682, "num_input_tokens_seen": 77446016, "step": 1236 }, { "epoch": 4.113144758735441, "loss": 0.3090360760688782, "loss_ce": 0.0005643867189064622, "loss_iou": 0.0654296875, "loss_num": 0.035400390625, "loss_xval": 0.30859375, "num_input_tokens_seen": 77446016, "step": 1236 }, { "epoch": 4.116472545757071, "grad_norm": 22.74576187133789, "learning_rate": 5e-06, "loss": 0.9686, "num_input_tokens_seen": 77511136, "step": 1237 }, { "epoch": 4.116472545757071, "loss": 1.0496145486831665, "loss_ce": 0.00029816440655849874, "loss_iou": 0.3359375, "loss_num": 0.0751953125, "loss_xval": 1.046875, "num_input_tokens_seen": 77511136, "step": 1237 }, { "epoch": 4.119800332778702, "grad_norm": 27.88201332092285, "learning_rate": 5e-06, "loss": 0.5876, "num_input_tokens_seen": 77574216, "step": 1238 }, { "epoch": 4.119800332778702, "loss": 0.6051973700523376, "loss_ce": 0.00046104774810373783, "loss_iou": 0.21875, "loss_num": 0.033447265625, "loss_xval": 0.60546875, "num_input_tokens_seen": 77574216, "step": 1238 }, { "epoch": 4.123128119800333, "grad_norm": 11.923408508300781, "learning_rate": 5e-06, "loss": 0.6994, "num_input_tokens_seen": 77636964, "step": 1239 }, { "epoch": 4.123128119800333, "loss": 0.9356486201286316, "loss_ce": 0.0003458983264863491, "loss_iou": 0.2890625, "loss_num": 0.0712890625, "loss_xval": 0.93359375, "num_input_tokens_seen": 77636964, "step": 1239 }, { "epoch": 4.126455906821963, "grad_norm": 14.89736270904541, "learning_rate": 5e-06, "loss": 0.4658, "num_input_tokens_seen": 77697900, "step": 1240 }, { "epoch": 4.126455906821963, "loss": 0.5524719953536987, "loss_ce": 0.009991498664021492, "loss_iou": 0.19140625, "loss_num": 0.031982421875, "loss_xval": 0.54296875, "num_input_tokens_seen": 77697900, "step": 1240 }, { "epoch": 4.129783693843594, "grad_norm": 7.385460376739502, "learning_rate": 5e-06, "loss": 0.7427, "num_input_tokens_seen": 77760148, "step": 1241 }, { "epoch": 4.129783693843594, "loss": 0.9304953813552856, "loss_ce": 0.0008078092359937727, "loss_iou": 0.3046875, "loss_num": 0.0634765625, "loss_xval": 0.9296875, "num_input_tokens_seen": 77760148, "step": 1241 }, { "epoch": 4.1331114808652245, "grad_norm": 11.04951286315918, "learning_rate": 5e-06, "loss": 0.6882, "num_input_tokens_seen": 77821680, "step": 1242 }, { "epoch": 4.1331114808652245, "loss": 0.5894361734390259, "loss_ce": 1.963950853678398e-05, "loss_iou": 0.19140625, "loss_num": 0.04150390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 77821680, "step": 1242 }, { "epoch": 4.136439267886855, "grad_norm": 41.49568176269531, "learning_rate": 5e-06, "loss": 0.9372, "num_input_tokens_seen": 77884232, "step": 1243 }, { "epoch": 4.136439267886855, "loss": 0.7601950168609619, "loss_ce": 0.0006735285860486329, "loss_iou": 0.28125, "loss_num": 0.0390625, "loss_xval": 0.7578125, "num_input_tokens_seen": 77884232, "step": 1243 }, { "epoch": 4.139767054908486, "grad_norm": 7.810762405395508, "learning_rate": 5e-06, "loss": 0.6704, "num_input_tokens_seen": 77947048, "step": 1244 }, { "epoch": 4.139767054908486, "loss": 0.817348062992096, "loss_ce": 0.00057560222921893, "loss_iou": 0.265625, "loss_num": 0.056640625, "loss_xval": 0.81640625, "num_input_tokens_seen": 77947048, "step": 1244 }, { "epoch": 4.143094841930116, "grad_norm": 14.463872909545898, "learning_rate": 5e-06, "loss": 0.5967, "num_input_tokens_seen": 78007588, "step": 1245 }, { "epoch": 4.143094841930116, "loss": 0.4404696822166443, "loss_ce": 3.99709097109735e-05, "loss_iou": 0.1337890625, "loss_num": 0.03466796875, "loss_xval": 0.44140625, "num_input_tokens_seen": 78007588, "step": 1245 }, { "epoch": 4.146422628951747, "grad_norm": 11.963314056396484, "learning_rate": 5e-06, "loss": 0.7085, "num_input_tokens_seen": 78069416, "step": 1246 }, { "epoch": 4.146422628951747, "loss": 0.6378343105316162, "loss_ce": 1.6925867384998128e-05, "loss_iou": 0.2109375, "loss_num": 0.043212890625, "loss_xval": 0.63671875, "num_input_tokens_seen": 78069416, "step": 1246 }, { "epoch": 4.149750415973378, "grad_norm": 23.427759170532227, "learning_rate": 5e-06, "loss": 0.6977, "num_input_tokens_seen": 78132404, "step": 1247 }, { "epoch": 4.149750415973378, "loss": 0.7420227527618408, "loss_ce": 0.00038455973844975233, "loss_iou": 0.2314453125, "loss_num": 0.0556640625, "loss_xval": 0.7421875, "num_input_tokens_seen": 78132404, "step": 1247 }, { "epoch": 4.153078202995008, "grad_norm": 19.418766021728516, "learning_rate": 5e-06, "loss": 0.5784, "num_input_tokens_seen": 78194872, "step": 1248 }, { "epoch": 4.153078202995008, "loss": 0.4353361427783966, "loss_ce": 0.00021654315060004592, "loss_iou": 0.11181640625, "loss_num": 0.042236328125, "loss_xval": 0.435546875, "num_input_tokens_seen": 78194872, "step": 1248 }, { "epoch": 4.156405990016639, "grad_norm": 12.20608139038086, "learning_rate": 5e-06, "loss": 0.8433, "num_input_tokens_seen": 78258412, "step": 1249 }, { "epoch": 4.156405990016639, "loss": 0.7217040061950684, "loss_ce": 2.4295461116707884e-05, "loss_iou": 0.263671875, "loss_num": 0.038818359375, "loss_xval": 0.72265625, "num_input_tokens_seen": 78258412, "step": 1249 }, { "epoch": 4.159733777038269, "grad_norm": 12.527824401855469, "learning_rate": 5e-06, "loss": 0.7033, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.159733777038269, "eval_seeclick_CIoU": 0.12239253148436546, "eval_seeclick_GIoU": 0.1511169895529747, "eval_seeclick_IoU": 0.21942590922117233, "eval_seeclick_MAE_all": 0.18183603137731552, "eval_seeclick_MAE_h": 0.05301396735012531, "eval_seeclick_MAE_w": 0.11510565504431725, "eval_seeclick_MAE_x_boxes": 0.26491403579711914, "eval_seeclick_MAE_y_boxes": 0.14476536214351654, "eval_seeclick_NUM_probability": 0.9971157312393188, "eval_seeclick_inside_bbox": 0.2718750014901161, "eval_seeclick_loss": 2.71364426612854, "eval_seeclick_loss_ce": 0.0916629321873188, "eval_seeclick_loss_iou": 0.8514404296875, "eval_seeclick_loss_num": 0.18135833740234375, "eval_seeclick_loss_xval": 2.6103515625, "eval_seeclick_runtime": 65.2703, "eval_seeclick_samples_per_second": 0.72, "eval_seeclick_steps_per_second": 0.031, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.159733777038269, "eval_icons_CIoU": 0.03201650455594063, "eval_icons_GIoU": 0.14019615948200226, "eval_icons_IoU": 0.1814664751291275, "eval_icons_MAE_all": 0.1366742141544819, "eval_icons_MAE_h": 0.053603796288371086, "eval_icons_MAE_w": 0.13712040334939957, "eval_icons_MAE_x_boxes": 0.16213426738977432, "eval_icons_MAE_y_boxes": 0.042389593087136745, "eval_icons_NUM_probability": 0.9999926686286926, "eval_icons_inside_bbox": 0.3576388955116272, "eval_icons_loss": 2.41593861579895, "eval_icons_loss_ce": 1.236241587321274e-06, "eval_icons_loss_iou": 0.845458984375, "eval_icons_loss_num": 0.13992691040039062, "eval_icons_loss_xval": 2.38916015625, "eval_icons_runtime": 64.8775, "eval_icons_samples_per_second": 0.771, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.159733777038269, "eval_screenspot_CIoU": 0.05670065308610598, "eval_screenspot_GIoU": 0.12369907399018605, "eval_screenspot_IoU": 0.20270906388759613, "eval_screenspot_MAE_all": 0.20509299635887146, "eval_screenspot_MAE_h": 0.0750602458914121, "eval_screenspot_MAE_w": 0.1766246110200882, "eval_screenspot_MAE_x_boxes": 0.26625461379686993, "eval_screenspot_MAE_y_boxes": 0.14071540037790933, "eval_screenspot_NUM_probability": 0.9999165137608846, "eval_screenspot_inside_bbox": 0.3387500047683716, "eval_screenspot_loss": 2.7868411540985107, "eval_screenspot_loss_ce": 0.0001397205999940828, "eval_screenspot_loss_iou": 0.8883463541666666, "eval_screenspot_loss_num": 0.21124267578125, "eval_screenspot_loss_xval": 2.8326822916666665, "eval_screenspot_runtime": 112.2559, "eval_screenspot_samples_per_second": 0.793, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.159733777038269, "eval_compot_CIoU": -0.04526886157691479, "eval_compot_GIoU": 0.07369405962526798, "eval_compot_IoU": 0.12557204440236092, "eval_compot_MAE_all": 0.21759440749883652, "eval_compot_MAE_h": 0.0656399242579937, "eval_compot_MAE_w": 0.24755792319774628, "eval_compot_MAE_x_boxes": 0.19112109392881393, "eval_compot_MAE_y_boxes": 0.13230591267347336, "eval_compot_NUM_probability": 0.9999650120735168, "eval_compot_inside_bbox": 0.2395833358168602, "eval_compot_loss": 2.8879714012145996, "eval_compot_loss_ce": 0.005962656578049064, "eval_compot_loss_iou": 0.923095703125, "eval_compot_loss_num": 0.205718994140625, "eval_compot_loss_xval": 2.873046875, "eval_compot_runtime": 74.5769, "eval_compot_samples_per_second": 0.67, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.159733777038269, "eval_custom_ui_MAE_all": 0.07984927669167519, "eval_custom_ui_MAE_x": 0.07491207309067249, "eval_custom_ui_MAE_y": 0.08478647843003273, "eval_custom_ui_NUM_probability": 0.9999922215938568, "eval_custom_ui_loss": 0.38728201389312744, "eval_custom_ui_loss_ce": 1.5295036746465485e-05, "eval_custom_ui_loss_num": 0.0752716064453125, "eval_custom_ui_loss_xval": 0.37652587890625, "eval_custom_ui_runtime": 58.5282, "eval_custom_ui_samples_per_second": 0.854, "eval_custom_ui_steps_per_second": 0.034, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.159733777038269, "loss": 0.3760015368461609, "loss_ce": 2.4953300453489646e-05, "loss_iou": 0.0, "loss_num": 0.0751953125, "loss_xval": 0.375, "num_input_tokens_seen": 78322204, "step": 1250 }, { "epoch": 4.1630615640599, "grad_norm": 8.160618782043457, "learning_rate": 5e-06, "loss": 0.5385, "num_input_tokens_seen": 78384488, "step": 1251 }, { "epoch": 4.1630615640599, "loss": 0.5602893829345703, "loss_ce": 0.0011768110562115908, "loss_iou": 0.166015625, "loss_num": 0.045166015625, "loss_xval": 0.55859375, "num_input_tokens_seen": 78384488, "step": 1251 }, { "epoch": 4.166389351081531, "grad_norm": 36.30681610107422, "learning_rate": 5e-06, "loss": 0.9031, "num_input_tokens_seen": 78447932, "step": 1252 }, { "epoch": 4.166389351081531, "loss": 0.8445026874542236, "loss_ce": 0.00026445844559930265, "loss_iou": 0.298828125, "loss_num": 0.049560546875, "loss_xval": 0.84375, "num_input_tokens_seen": 78447932, "step": 1252 }, { "epoch": 4.169717138103161, "grad_norm": 49.61948776245117, "learning_rate": 5e-06, "loss": 0.9137, "num_input_tokens_seen": 78513152, "step": 1253 }, { "epoch": 4.169717138103161, "loss": 0.8838903903961182, "loss_ce": 0.0010779636213555932, "loss_iou": 0.310546875, "loss_num": 0.052734375, "loss_xval": 0.8828125, "num_input_tokens_seen": 78513152, "step": 1253 }, { "epoch": 4.173044925124792, "grad_norm": 29.25813102722168, "learning_rate": 5e-06, "loss": 0.4711, "num_input_tokens_seen": 78574284, "step": 1254 }, { "epoch": 4.173044925124792, "loss": 0.4923175275325775, "loss_ce": 0.0004657130048144609, "loss_iou": 0.1318359375, "loss_num": 0.045654296875, "loss_xval": 0.4921875, "num_input_tokens_seen": 78574284, "step": 1254 }, { "epoch": 4.1763727121464225, "grad_norm": 20.463504791259766, "learning_rate": 5e-06, "loss": 0.6105, "num_input_tokens_seen": 78635596, "step": 1255 }, { "epoch": 4.1763727121464225, "loss": 0.667670488357544, "loss_ce": 0.0014106841990724206, "loss_iou": 0.23828125, "loss_num": 0.0380859375, "loss_xval": 0.66796875, "num_input_tokens_seen": 78635596, "step": 1255 }, { "epoch": 4.179700499168053, "grad_norm": 18.627574920654297, "learning_rate": 5e-06, "loss": 0.6598, "num_input_tokens_seen": 78697748, "step": 1256 }, { "epoch": 4.179700499168053, "loss": 0.6276933550834656, "loss_ce": 0.00021382489649113268, "loss_iou": 0.23046875, "loss_num": 0.033203125, "loss_xval": 0.62890625, "num_input_tokens_seen": 78697748, "step": 1256 }, { "epoch": 4.183028286189684, "grad_norm": 8.834410667419434, "learning_rate": 5e-06, "loss": 0.5655, "num_input_tokens_seen": 78759760, "step": 1257 }, { "epoch": 4.183028286189684, "loss": 0.6625989079475403, "loss_ce": 0.0004895288147963583, "loss_iou": 0.2275390625, "loss_num": 0.04150390625, "loss_xval": 0.6640625, "num_input_tokens_seen": 78759760, "step": 1257 }, { "epoch": 4.186356073211314, "grad_norm": 14.236786842346191, "learning_rate": 5e-06, "loss": 0.6359, "num_input_tokens_seen": 78823028, "step": 1258 }, { "epoch": 4.186356073211314, "loss": 0.6722902059555054, "loss_ce": 4.9012665840564296e-05, "loss_iou": 0.26953125, "loss_num": 0.0267333984375, "loss_xval": 0.671875, "num_input_tokens_seen": 78823028, "step": 1258 }, { "epoch": 4.189683860232945, "grad_norm": 83.77409362792969, "learning_rate": 5e-06, "loss": 0.5601, "num_input_tokens_seen": 78885504, "step": 1259 }, { "epoch": 4.189683860232945, "loss": 0.4171263575553894, "loss_ce": 0.0001341925235465169, "loss_iou": 0.146484375, "loss_num": 0.02490234375, "loss_xval": 0.41796875, "num_input_tokens_seen": 78885504, "step": 1259 }, { "epoch": 4.1930116472545755, "grad_norm": 12.761661529541016, "learning_rate": 5e-06, "loss": 0.8203, "num_input_tokens_seen": 78947504, "step": 1260 }, { "epoch": 4.1930116472545755, "loss": 0.7472410202026367, "loss_ce": 4.86068420286756e-05, "loss_iou": 0.216796875, "loss_num": 0.062255859375, "loss_xval": 0.74609375, "num_input_tokens_seen": 78947504, "step": 1260 }, { "epoch": 4.196339434276206, "grad_norm": 12.434175491333008, "learning_rate": 5e-06, "loss": 0.6015, "num_input_tokens_seen": 79006968, "step": 1261 }, { "epoch": 4.196339434276206, "loss": 0.5894126296043396, "loss_ce": 0.0007895919261500239, "loss_iou": 0.173828125, "loss_num": 0.048095703125, "loss_xval": 0.58984375, "num_input_tokens_seen": 79006968, "step": 1261 }, { "epoch": 4.199667221297837, "grad_norm": 10.954483985900879, "learning_rate": 5e-06, "loss": 0.9019, "num_input_tokens_seen": 79068724, "step": 1262 }, { "epoch": 4.199667221297837, "loss": 0.8818979859352112, "loss_ce": 0.0005503545398823917, "loss_iou": 0.31640625, "loss_num": 0.0498046875, "loss_xval": 0.8828125, "num_input_tokens_seen": 79068724, "step": 1262 }, { "epoch": 4.202995008319467, "grad_norm": 10.366689682006836, "learning_rate": 5e-06, "loss": 0.5979, "num_input_tokens_seen": 79127728, "step": 1263 }, { "epoch": 4.202995008319467, "loss": 0.7296203970909119, "loss_ce": 6.130624115030514e-06, "loss_iou": 0.224609375, "loss_num": 0.055908203125, "loss_xval": 0.73046875, "num_input_tokens_seen": 79127728, "step": 1263 }, { "epoch": 4.206322795341098, "grad_norm": 9.267184257507324, "learning_rate": 5e-06, "loss": 0.7216, "num_input_tokens_seen": 79190896, "step": 1264 }, { "epoch": 4.206322795341098, "loss": 0.5524642467498779, "loss_ce": 0.00021812476916238666, "loss_iou": 0.1728515625, "loss_num": 0.041015625, "loss_xval": 0.55078125, "num_input_tokens_seen": 79190896, "step": 1264 }, { "epoch": 4.209650582362729, "grad_norm": 9.680049896240234, "learning_rate": 5e-06, "loss": 0.7248, "num_input_tokens_seen": 79253700, "step": 1265 }, { "epoch": 4.209650582362729, "loss": 0.6744685173034668, "loss_ce": 0.00015209820412565023, "loss_iou": 0.263671875, "loss_num": 0.0291748046875, "loss_xval": 0.67578125, "num_input_tokens_seen": 79253700, "step": 1265 }, { "epoch": 4.212978369384359, "grad_norm": 13.558832168579102, "learning_rate": 5e-06, "loss": 0.8204, "num_input_tokens_seen": 79316324, "step": 1266 }, { "epoch": 4.212978369384359, "loss": 0.8318663835525513, "loss_ce": 0.00020139635307714343, "loss_iou": 0.294921875, "loss_num": 0.048583984375, "loss_xval": 0.83203125, "num_input_tokens_seen": 79316324, "step": 1266 }, { "epoch": 4.21630615640599, "grad_norm": 34.612335205078125, "learning_rate": 5e-06, "loss": 0.6904, "num_input_tokens_seen": 79379112, "step": 1267 }, { "epoch": 4.21630615640599, "loss": 0.7487878799438477, "loss_ce": 8.608042662672233e-06, "loss_iou": 0.2451171875, "loss_num": 0.0517578125, "loss_xval": 0.75, "num_input_tokens_seen": 79379112, "step": 1267 }, { "epoch": 4.21963394342762, "grad_norm": 13.187520027160645, "learning_rate": 5e-06, "loss": 0.7209, "num_input_tokens_seen": 79441800, "step": 1268 }, { "epoch": 4.21963394342762, "loss": 0.6884113550186157, "loss_ce": 0.0010334014659747481, "loss_iou": 0.23828125, "loss_num": 0.042236328125, "loss_xval": 0.6875, "num_input_tokens_seen": 79441800, "step": 1268 }, { "epoch": 4.222961730449251, "grad_norm": 29.02869415283203, "learning_rate": 5e-06, "loss": 0.7215, "num_input_tokens_seen": 79503748, "step": 1269 }, { "epoch": 4.222961730449251, "loss": 0.7619832754135132, "loss_ce": 2.0363593648653477e-05, "loss_iou": 0.287109375, "loss_num": 0.03759765625, "loss_xval": 0.76171875, "num_input_tokens_seen": 79503748, "step": 1269 }, { "epoch": 4.226289517470882, "grad_norm": 56.70915603637695, "learning_rate": 5e-06, "loss": 0.6429, "num_input_tokens_seen": 79564680, "step": 1270 }, { "epoch": 4.226289517470882, "loss": 0.7590841054916382, "loss_ce": 5.0929971621371806e-05, "loss_iou": 0.1982421875, "loss_num": 0.07275390625, "loss_xval": 0.7578125, "num_input_tokens_seen": 79564680, "step": 1270 }, { "epoch": 4.229617304492512, "grad_norm": 16.210630416870117, "learning_rate": 5e-06, "loss": 0.7066, "num_input_tokens_seen": 79626448, "step": 1271 }, { "epoch": 4.229617304492512, "loss": 0.6228231191635132, "loss_ce": 0.001607277779839933, "loss_iou": 0.20703125, "loss_num": 0.04150390625, "loss_xval": 0.62109375, "num_input_tokens_seen": 79626448, "step": 1271 }, { "epoch": 4.232945091514143, "grad_norm": 26.057252883911133, "learning_rate": 5e-06, "loss": 0.6158, "num_input_tokens_seen": 79689628, "step": 1272 }, { "epoch": 4.232945091514143, "loss": 0.6330506801605225, "loss_ce": 0.00023816811153665185, "loss_iou": 0.248046875, "loss_num": 0.0272216796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 79689628, "step": 1272 }, { "epoch": 4.2362728785357735, "grad_norm": 8.330951690673828, "learning_rate": 5e-06, "loss": 0.75, "num_input_tokens_seen": 79752856, "step": 1273 }, { "epoch": 4.2362728785357735, "loss": 0.9343417286872864, "loss_ce": 1.557775613036938e-05, "loss_iou": 0.359375, "loss_num": 0.04296875, "loss_xval": 0.93359375, "num_input_tokens_seen": 79752856, "step": 1273 }, { "epoch": 4.239600665557404, "grad_norm": 28.429784774780273, "learning_rate": 5e-06, "loss": 0.7537, "num_input_tokens_seen": 79813784, "step": 1274 }, { "epoch": 4.239600665557404, "loss": 0.5956960916519165, "loss_ce": 0.0004812246188521385, "loss_iou": 0.177734375, "loss_num": 0.0478515625, "loss_xval": 0.59375, "num_input_tokens_seen": 79813784, "step": 1274 }, { "epoch": 4.242928452579035, "grad_norm": 14.092952728271484, "learning_rate": 5e-06, "loss": 0.8371, "num_input_tokens_seen": 79878556, "step": 1275 }, { "epoch": 4.242928452579035, "loss": 0.9099881052970886, "loss_ce": 7.601917604915798e-05, "loss_iou": 0.353515625, "loss_num": 0.041015625, "loss_xval": 0.91015625, "num_input_tokens_seen": 79878556, "step": 1275 }, { "epoch": 4.246256239600665, "grad_norm": 25.563735961914062, "learning_rate": 5e-06, "loss": 0.7424, "num_input_tokens_seen": 79940316, "step": 1276 }, { "epoch": 4.246256239600665, "loss": 0.7878633737564087, "loss_ce": 2.1567015210166574e-05, "loss_iou": 0.2431640625, "loss_num": 0.059814453125, "loss_xval": 0.7890625, "num_input_tokens_seen": 79940316, "step": 1276 }, { "epoch": 4.249584026622296, "grad_norm": 16.13930892944336, "learning_rate": 5e-06, "loss": 0.6181, "num_input_tokens_seen": 80002848, "step": 1277 }, { "epoch": 4.249584026622296, "loss": 0.6337130069732666, "loss_ce": 0.00016809771477710456, "loss_iou": 0.20703125, "loss_num": 0.0439453125, "loss_xval": 0.6328125, "num_input_tokens_seen": 80002848, "step": 1277 }, { "epoch": 4.252911813643927, "grad_norm": 13.609673500061035, "learning_rate": 5e-06, "loss": 0.8332, "num_input_tokens_seen": 80065168, "step": 1278 }, { "epoch": 4.252911813643927, "loss": 0.688213586807251, "loss_ce": 0.00022533058654516935, "loss_iou": 0.22265625, "loss_num": 0.04833984375, "loss_xval": 0.6875, "num_input_tokens_seen": 80065168, "step": 1278 }, { "epoch": 4.256239600665557, "grad_norm": 13.89177417755127, "learning_rate": 5e-06, "loss": 0.7432, "num_input_tokens_seen": 80126552, "step": 1279 }, { "epoch": 4.256239600665557, "loss": 0.7881263494491577, "loss_ce": 4.041851207148284e-05, "loss_iou": 0.2578125, "loss_num": 0.0546875, "loss_xval": 0.7890625, "num_input_tokens_seen": 80126552, "step": 1279 }, { "epoch": 4.259567387687188, "grad_norm": 9.584406852722168, "learning_rate": 5e-06, "loss": 0.7185, "num_input_tokens_seen": 80189328, "step": 1280 }, { "epoch": 4.259567387687188, "loss": 0.6353198289871216, "loss_ce": 0.0009204413508996367, "loss_iou": 0.2333984375, "loss_num": 0.033447265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 80189328, "step": 1280 }, { "epoch": 4.262895174708818, "grad_norm": 8.969870567321777, "learning_rate": 5e-06, "loss": 0.783, "num_input_tokens_seen": 80252808, "step": 1281 }, { "epoch": 4.262895174708818, "loss": 0.9145519137382507, "loss_ce": 0.0019542670343071222, "loss_iou": 0.32421875, "loss_num": 0.052734375, "loss_xval": 0.9140625, "num_input_tokens_seen": 80252808, "step": 1281 }, { "epoch": 4.266222961730449, "grad_norm": 19.581363677978516, "learning_rate": 5e-06, "loss": 0.5597, "num_input_tokens_seen": 80315084, "step": 1282 }, { "epoch": 4.266222961730449, "loss": 0.49742501974105835, "loss_ce": 1.9012808479601517e-05, "loss_iou": 0.0751953125, "loss_num": 0.0693359375, "loss_xval": 0.498046875, "num_input_tokens_seen": 80315084, "step": 1282 }, { "epoch": 4.26955074875208, "grad_norm": 18.102149963378906, "learning_rate": 5e-06, "loss": 0.6311, "num_input_tokens_seen": 80378400, "step": 1283 }, { "epoch": 4.26955074875208, "loss": 0.47788214683532715, "loss_ce": 9.894504910334945e-05, "loss_iou": 0.166015625, "loss_num": 0.0289306640625, "loss_xval": 0.478515625, "num_input_tokens_seen": 80378400, "step": 1283 }, { "epoch": 4.27287853577371, "grad_norm": 14.705218315124512, "learning_rate": 5e-06, "loss": 0.7301, "num_input_tokens_seen": 80440412, "step": 1284 }, { "epoch": 4.27287853577371, "loss": 0.6811676025390625, "loss_ce": 1.5225450624711812e-05, "loss_iou": 0.248046875, "loss_num": 0.036865234375, "loss_xval": 0.6796875, "num_input_tokens_seen": 80440412, "step": 1284 }, { "epoch": 4.276206322795341, "grad_norm": 11.81163501739502, "learning_rate": 5e-06, "loss": 0.7499, "num_input_tokens_seen": 80503416, "step": 1285 }, { "epoch": 4.276206322795341, "loss": 0.718535304069519, "loss_ce": 2.9458708013407886e-05, "loss_iou": 0.28515625, "loss_num": 0.0294189453125, "loss_xval": 0.71875, "num_input_tokens_seen": 80503416, "step": 1285 }, { "epoch": 4.2795341098169715, "grad_norm": 11.332803726196289, "learning_rate": 5e-06, "loss": 0.6372, "num_input_tokens_seen": 80566256, "step": 1286 }, { "epoch": 4.2795341098169715, "loss": 0.6951990127563477, "loss_ce": 0.0007410263060592115, "loss_iou": 0.248046875, "loss_num": 0.039794921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 80566256, "step": 1286 }, { "epoch": 4.282861896838602, "grad_norm": 12.372754096984863, "learning_rate": 5e-06, "loss": 0.6649, "num_input_tokens_seen": 80628540, "step": 1287 }, { "epoch": 4.282861896838602, "loss": 0.8295961618423462, "loss_ce": 0.0007388241938315332, "loss_iou": 0.318359375, "loss_num": 0.0380859375, "loss_xval": 0.828125, "num_input_tokens_seen": 80628540, "step": 1287 }, { "epoch": 4.286189683860233, "grad_norm": 7.756584167480469, "learning_rate": 5e-06, "loss": 0.6643, "num_input_tokens_seen": 80690812, "step": 1288 }, { "epoch": 4.286189683860233, "loss": 0.6631507277488708, "loss_ce": 0.001529662637040019, "loss_iou": 0.2333984375, "loss_num": 0.038818359375, "loss_xval": 0.66015625, "num_input_tokens_seen": 80690812, "step": 1288 }, { "epoch": 4.289517470881863, "grad_norm": 12.011344909667969, "learning_rate": 5e-06, "loss": 0.8091, "num_input_tokens_seen": 80754852, "step": 1289 }, { "epoch": 4.289517470881863, "loss": 0.5517739057540894, "loss_ce": 0.00026023387908935547, "loss_iou": 0.201171875, "loss_num": 0.030029296875, "loss_xval": 0.55078125, "num_input_tokens_seen": 80754852, "step": 1289 }, { "epoch": 4.292845257903494, "grad_norm": 14.856667518615723, "learning_rate": 5e-06, "loss": 0.7544, "num_input_tokens_seen": 80816168, "step": 1290 }, { "epoch": 4.292845257903494, "loss": 0.5227793455123901, "loss_ce": 0.000226860967813991, "loss_iou": 0.16015625, "loss_num": 0.040283203125, "loss_xval": 0.5234375, "num_input_tokens_seen": 80816168, "step": 1290 }, { "epoch": 4.2961730449251245, "grad_norm": 10.020024299621582, "learning_rate": 5e-06, "loss": 0.7188, "num_input_tokens_seen": 80880640, "step": 1291 }, { "epoch": 4.2961730449251245, "loss": 0.760665237903595, "loss_ce": 0.0022423923946917057, "loss_iou": 0.26953125, "loss_num": 0.04345703125, "loss_xval": 0.7578125, "num_input_tokens_seen": 80880640, "step": 1291 }, { "epoch": 4.299500831946755, "grad_norm": 11.497454643249512, "learning_rate": 5e-06, "loss": 0.5628, "num_input_tokens_seen": 80943176, "step": 1292 }, { "epoch": 4.299500831946755, "loss": 0.687080979347229, "loss_ce": 6.930784729775041e-05, "loss_iou": 0.2373046875, "loss_num": 0.042724609375, "loss_xval": 0.6875, "num_input_tokens_seen": 80943176, "step": 1292 }, { "epoch": 4.302828618968386, "grad_norm": 12.758271217346191, "learning_rate": 5e-06, "loss": 0.8416, "num_input_tokens_seen": 81007208, "step": 1293 }, { "epoch": 4.302828618968386, "loss": 0.7446075677871704, "loss_ce": 0.0004669097252190113, "loss_iou": 0.275390625, "loss_num": 0.038818359375, "loss_xval": 0.7421875, "num_input_tokens_seen": 81007208, "step": 1293 }, { "epoch": 4.306156405990016, "grad_norm": 52.66012954711914, "learning_rate": 5e-06, "loss": 0.5894, "num_input_tokens_seen": 81067976, "step": 1294 }, { "epoch": 4.306156405990016, "loss": 0.37765905261039734, "loss_ce": 0.0007974714390002191, "loss_iou": 0.10693359375, "loss_num": 0.03271484375, "loss_xval": 0.376953125, "num_input_tokens_seen": 81067976, "step": 1294 }, { "epoch": 4.309484193011647, "grad_norm": 12.44371509552002, "learning_rate": 5e-06, "loss": 0.5179, "num_input_tokens_seen": 81128904, "step": 1295 }, { "epoch": 4.309484193011647, "loss": 0.6360405683517456, "loss_ce": 5.427756696008146e-05, "loss_iou": 0.2412109375, "loss_num": 0.0306396484375, "loss_xval": 0.63671875, "num_input_tokens_seen": 81128904, "step": 1295 }, { "epoch": 4.312811980033278, "grad_norm": 24.886842727661133, "learning_rate": 5e-06, "loss": 0.705, "num_input_tokens_seen": 81191592, "step": 1296 }, { "epoch": 4.312811980033278, "loss": 0.7338913083076477, "loss_ce": 4.5570727706945036e-06, "loss_iou": 0.2890625, "loss_num": 0.03076171875, "loss_xval": 0.734375, "num_input_tokens_seen": 81191592, "step": 1296 }, { "epoch": 4.316139767054908, "grad_norm": 30.665220260620117, "learning_rate": 5e-06, "loss": 0.7969, "num_input_tokens_seen": 81255356, "step": 1297 }, { "epoch": 4.316139767054908, "loss": 0.7743918895721436, "loss_ce": 0.001564753009006381, "loss_iou": 0.25390625, "loss_num": 0.05322265625, "loss_xval": 0.7734375, "num_input_tokens_seen": 81255356, "step": 1297 }, { "epoch": 4.319467554076539, "grad_norm": 29.902305603027344, "learning_rate": 5e-06, "loss": 0.8438, "num_input_tokens_seen": 81318216, "step": 1298 }, { "epoch": 4.319467554076539, "loss": 0.7068020105361938, "loss_ce": 1.4880570233799517e-05, "loss_iou": 0.251953125, "loss_num": 0.040283203125, "loss_xval": 0.70703125, "num_input_tokens_seen": 81318216, "step": 1298 }, { "epoch": 4.322795341098169, "grad_norm": 13.050071716308594, "learning_rate": 5e-06, "loss": 0.6442, "num_input_tokens_seen": 81381692, "step": 1299 }, { "epoch": 4.322795341098169, "loss": 0.3645215630531311, "loss_ce": 1.962293026736006e-05, "loss_iou": 0.1298828125, "loss_num": 0.02099609375, "loss_xval": 0.365234375, "num_input_tokens_seen": 81381692, "step": 1299 }, { "epoch": 4.3261231281198, "grad_norm": 14.20059871673584, "learning_rate": 5e-06, "loss": 0.6968, "num_input_tokens_seen": 81445260, "step": 1300 }, { "epoch": 4.3261231281198, "loss": 0.9003438353538513, "loss_ce": 0.0004414235008880496, "loss_iou": 0.314453125, "loss_num": 0.0546875, "loss_xval": 0.8984375, "num_input_tokens_seen": 81445260, "step": 1300 }, { "epoch": 4.329450915141431, "grad_norm": 10.232033729553223, "learning_rate": 5e-06, "loss": 0.6684, "num_input_tokens_seen": 81507220, "step": 1301 }, { "epoch": 4.329450915141431, "loss": 0.922446608543396, "loss_ce": 0.00044959314982406795, "loss_iou": 0.30078125, "loss_num": 0.06396484375, "loss_xval": 0.921875, "num_input_tokens_seen": 81507220, "step": 1301 }, { "epoch": 4.332778702163061, "grad_norm": 8.437857627868652, "learning_rate": 5e-06, "loss": 0.5038, "num_input_tokens_seen": 81570812, "step": 1302 }, { "epoch": 4.332778702163061, "loss": 0.6514133214950562, "loss_ce": 4.610713222064078e-05, "loss_iou": 0.2314453125, "loss_num": 0.037353515625, "loss_xval": 0.65234375, "num_input_tokens_seen": 81570812, "step": 1302 }, { "epoch": 4.336106489184692, "grad_norm": 9.5712308883667, "learning_rate": 5e-06, "loss": 0.9257, "num_input_tokens_seen": 81634604, "step": 1303 }, { "epoch": 4.336106489184692, "loss": 1.1645619869232178, "loss_ce": 0.0007436010637320578, "loss_iou": 0.376953125, "loss_num": 0.08203125, "loss_xval": 1.1640625, "num_input_tokens_seen": 81634604, "step": 1303 }, { "epoch": 4.3394342762063225, "grad_norm": 24.645009994506836, "learning_rate": 5e-06, "loss": 0.8503, "num_input_tokens_seen": 81698128, "step": 1304 }, { "epoch": 4.3394342762063225, "loss": 0.7591370940208435, "loss_ce": 0.0015687549021095037, "loss_iou": 0.244140625, "loss_num": 0.0537109375, "loss_xval": 0.7578125, "num_input_tokens_seen": 81698128, "step": 1304 }, { "epoch": 4.342762063227953, "grad_norm": 18.755535125732422, "learning_rate": 5e-06, "loss": 0.8487, "num_input_tokens_seen": 81760788, "step": 1305 }, { "epoch": 4.342762063227953, "loss": 0.7894872426986694, "loss_ce": 0.001157181104645133, "loss_iou": 0.248046875, "loss_num": 0.05859375, "loss_xval": 0.7890625, "num_input_tokens_seen": 81760788, "step": 1305 }, { "epoch": 4.346089850249584, "grad_norm": 11.36084270477295, "learning_rate": 5e-06, "loss": 0.8839, "num_input_tokens_seen": 81823160, "step": 1306 }, { "epoch": 4.346089850249584, "loss": 0.6701819896697998, "loss_ce": 1.5993844499462284e-05, "loss_iou": 0.2578125, "loss_num": 0.0303955078125, "loss_xval": 0.671875, "num_input_tokens_seen": 81823160, "step": 1306 }, { "epoch": 4.349417637271214, "grad_norm": 14.648335456848145, "learning_rate": 5e-06, "loss": 0.96, "num_input_tokens_seen": 81885560, "step": 1307 }, { "epoch": 4.349417637271214, "loss": 1.0643436908721924, "loss_ce": 0.001599527895450592, "loss_iou": 0.322265625, "loss_num": 0.083984375, "loss_xval": 1.0625, "num_input_tokens_seen": 81885560, "step": 1307 }, { "epoch": 4.352745424292845, "grad_norm": 15.327980995178223, "learning_rate": 5e-06, "loss": 0.5124, "num_input_tokens_seen": 81947560, "step": 1308 }, { "epoch": 4.352745424292845, "loss": 0.28150349855422974, "loss_ce": 0.0004366114444565028, "loss_iou": 0.04931640625, "loss_num": 0.036376953125, "loss_xval": 0.28125, "num_input_tokens_seen": 81947560, "step": 1308 }, { "epoch": 4.356073211314476, "grad_norm": 12.198101043701172, "learning_rate": 5e-06, "loss": 0.6174, "num_input_tokens_seen": 82008732, "step": 1309 }, { "epoch": 4.356073211314476, "loss": 0.6670141816139221, "loss_ce": 0.001486798282712698, "loss_iou": 0.2197265625, "loss_num": 0.045166015625, "loss_xval": 0.6640625, "num_input_tokens_seen": 82008732, "step": 1309 }, { "epoch": 4.359400998336106, "grad_norm": 12.514129638671875, "learning_rate": 5e-06, "loss": 0.6429, "num_input_tokens_seen": 82071748, "step": 1310 }, { "epoch": 4.359400998336106, "loss": 0.5801935195922852, "loss_ce": 5.4305866797221825e-05, "loss_iou": 0.1875, "loss_num": 0.041015625, "loss_xval": 0.58203125, "num_input_tokens_seen": 82071748, "step": 1310 }, { "epoch": 4.362728785357737, "grad_norm": 12.292180061340332, "learning_rate": 5e-06, "loss": 0.7611, "num_input_tokens_seen": 82135220, "step": 1311 }, { "epoch": 4.362728785357737, "loss": 0.6303189992904663, "loss_ce": 0.00043624168029055, "loss_iou": 0.26953125, "loss_num": 0.018310546875, "loss_xval": 0.62890625, "num_input_tokens_seen": 82135220, "step": 1311 }, { "epoch": 4.366056572379367, "grad_norm": 19.755245208740234, "learning_rate": 5e-06, "loss": 0.9127, "num_input_tokens_seen": 82198120, "step": 1312 }, { "epoch": 4.366056572379367, "loss": 0.985588788986206, "loss_ce": 0.0004814087296836078, "loss_iou": 0.36328125, "loss_num": 0.052001953125, "loss_xval": 0.984375, "num_input_tokens_seen": 82198120, "step": 1312 }, { "epoch": 4.369384359400998, "grad_norm": 16.86263656616211, "learning_rate": 5e-06, "loss": 0.6468, "num_input_tokens_seen": 82260076, "step": 1313 }, { "epoch": 4.369384359400998, "loss": 0.682517409324646, "loss_ce": 0.000388490705518052, "loss_iou": 0.224609375, "loss_num": 0.046630859375, "loss_xval": 0.68359375, "num_input_tokens_seen": 82260076, "step": 1313 }, { "epoch": 4.372712146422629, "grad_norm": 9.756426811218262, "learning_rate": 5e-06, "loss": 0.9654, "num_input_tokens_seen": 82322316, "step": 1314 }, { "epoch": 4.372712146422629, "loss": 0.9413772821426392, "loss_ce": 0.00021517441200558096, "loss_iou": 0.318359375, "loss_num": 0.060546875, "loss_xval": 0.94140625, "num_input_tokens_seen": 82322316, "step": 1314 }, { "epoch": 4.376039933444259, "grad_norm": 11.446276664733887, "learning_rate": 5e-06, "loss": 0.6246, "num_input_tokens_seen": 82386100, "step": 1315 }, { "epoch": 4.376039933444259, "loss": 0.7032071352005005, "loss_ce": 0.0006314606289379299, "loss_iou": 0.267578125, "loss_num": 0.033203125, "loss_xval": 0.703125, "num_input_tokens_seen": 82386100, "step": 1315 }, { "epoch": 4.37936772046589, "grad_norm": 11.805602073669434, "learning_rate": 5e-06, "loss": 0.8133, "num_input_tokens_seen": 82448276, "step": 1316 }, { "epoch": 4.37936772046589, "loss": 0.5263853669166565, "loss_ce": 1.8209135305369273e-05, "loss_iou": 0.1376953125, "loss_num": 0.05029296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 82448276, "step": 1316 }, { "epoch": 4.3826955074875205, "grad_norm": 14.017489433288574, "learning_rate": 5e-06, "loss": 0.7262, "num_input_tokens_seen": 82509572, "step": 1317 }, { "epoch": 4.3826955074875205, "loss": 0.7585628032684326, "loss_ce": 0.0005061531555838883, "loss_iou": 0.2138671875, "loss_num": 0.06640625, "loss_xval": 0.7578125, "num_input_tokens_seen": 82509572, "step": 1317 }, { "epoch": 4.386023294509151, "grad_norm": 9.340023040771484, "learning_rate": 5e-06, "loss": 0.9733, "num_input_tokens_seen": 82572988, "step": 1318 }, { "epoch": 4.386023294509151, "loss": 0.9580419063568115, "loss_ce": 0.00015617434110026807, "loss_iou": 0.294921875, "loss_num": 0.07373046875, "loss_xval": 0.95703125, "num_input_tokens_seen": 82572988, "step": 1318 }, { "epoch": 4.389351081530782, "grad_norm": 23.777090072631836, "learning_rate": 5e-06, "loss": 0.6908, "num_input_tokens_seen": 82635976, "step": 1319 }, { "epoch": 4.389351081530782, "loss": 0.8914474248886108, "loss_ce": 0.0010665201116353273, "loss_iou": 0.34375, "loss_num": 0.040771484375, "loss_xval": 0.890625, "num_input_tokens_seen": 82635976, "step": 1319 }, { "epoch": 4.392678868552412, "grad_norm": 22.563859939575195, "learning_rate": 5e-06, "loss": 0.6377, "num_input_tokens_seen": 82700040, "step": 1320 }, { "epoch": 4.392678868552412, "loss": 0.6494787931442261, "loss_ce": 0.002262030728161335, "loss_iou": 0.201171875, "loss_num": 0.048828125, "loss_xval": 0.6484375, "num_input_tokens_seen": 82700040, "step": 1320 }, { "epoch": 4.396006655574043, "grad_norm": 16.087520599365234, "learning_rate": 5e-06, "loss": 0.677, "num_input_tokens_seen": 82763308, "step": 1321 }, { "epoch": 4.396006655574043, "loss": 0.8195084929466248, "loss_ce": 0.0006608610274270177, "loss_iou": 0.275390625, "loss_num": 0.0537109375, "loss_xval": 0.8203125, "num_input_tokens_seen": 82763308, "step": 1321 }, { "epoch": 4.3993344425956735, "grad_norm": 15.090987205505371, "learning_rate": 5e-06, "loss": 0.6979, "num_input_tokens_seen": 82826744, "step": 1322 }, { "epoch": 4.3993344425956735, "loss": 0.4208059310913086, "loss_ce": 2.9584578442154452e-05, "loss_iou": 0.1337890625, "loss_num": 0.030517578125, "loss_xval": 0.419921875, "num_input_tokens_seen": 82826744, "step": 1322 }, { "epoch": 4.402662229617304, "grad_norm": 14.373215675354004, "learning_rate": 5e-06, "loss": 0.7911, "num_input_tokens_seen": 82889604, "step": 1323 }, { "epoch": 4.402662229617304, "loss": 0.6858798265457153, "loss_ce": 0.0013094794703647494, "loss_iou": 0.279296875, "loss_num": 0.025390625, "loss_xval": 0.68359375, "num_input_tokens_seen": 82889604, "step": 1323 }, { "epoch": 4.405990016638935, "grad_norm": 29.24193572998047, "learning_rate": 5e-06, "loss": 0.9223, "num_input_tokens_seen": 82952864, "step": 1324 }, { "epoch": 4.405990016638935, "loss": 1.03691828250885, "loss_ce": 5.295296432450414e-05, "loss_iou": 0.408203125, "loss_num": 0.04443359375, "loss_xval": 1.0390625, "num_input_tokens_seen": 82952864, "step": 1324 }, { "epoch": 4.409317803660565, "grad_norm": 14.150858879089355, "learning_rate": 5e-06, "loss": 0.7532, "num_input_tokens_seen": 83015712, "step": 1325 }, { "epoch": 4.409317803660565, "loss": 0.8280280232429504, "loss_ce": 0.00020820634381379932, "loss_iou": 0.322265625, "loss_num": 0.037109375, "loss_xval": 0.828125, "num_input_tokens_seen": 83015712, "step": 1325 }, { "epoch": 4.412645590682196, "grad_norm": 29.858686447143555, "learning_rate": 5e-06, "loss": 0.6685, "num_input_tokens_seen": 83078676, "step": 1326 }, { "epoch": 4.412645590682196, "loss": 0.7052803039550781, "loss_ce": 0.00044628471368923783, "loss_iou": 0.25390625, "loss_num": 0.039794921875, "loss_xval": 0.703125, "num_input_tokens_seen": 83078676, "step": 1326 }, { "epoch": 4.415973377703827, "grad_norm": 15.250243186950684, "learning_rate": 5e-06, "loss": 0.6323, "num_input_tokens_seen": 83141596, "step": 1327 }, { "epoch": 4.415973377703827, "loss": 0.8748631477355957, "loss_ce": 0.00022933242144063115, "loss_iou": 0.30078125, "loss_num": 0.054931640625, "loss_xval": 0.875, "num_input_tokens_seen": 83141596, "step": 1327 }, { "epoch": 4.419301164725457, "grad_norm": 17.645246505737305, "learning_rate": 5e-06, "loss": 0.7055, "num_input_tokens_seen": 83202652, "step": 1328 }, { "epoch": 4.419301164725457, "loss": 0.8196154832839966, "loss_ce": 3.535650466801599e-05, "loss_iou": 0.2421875, "loss_num": 0.06689453125, "loss_xval": 0.8203125, "num_input_tokens_seen": 83202652, "step": 1328 }, { "epoch": 4.422628951747088, "grad_norm": 10.930619239807129, "learning_rate": 5e-06, "loss": 0.5952, "num_input_tokens_seen": 83266684, "step": 1329 }, { "epoch": 4.422628951747088, "loss": 0.5401097536087036, "loss_ce": 7.067422848194838e-05, "loss_iou": 0.1904296875, "loss_num": 0.031982421875, "loss_xval": 0.5390625, "num_input_tokens_seen": 83266684, "step": 1329 }, { "epoch": 4.425956738768718, "grad_norm": 21.14279556274414, "learning_rate": 5e-06, "loss": 0.7999, "num_input_tokens_seen": 83330424, "step": 1330 }, { "epoch": 4.425956738768718, "loss": 0.5837583541870117, "loss_ce": 1.807482840376906e-05, "loss_iou": 0.1982421875, "loss_num": 0.037353515625, "loss_xval": 0.58203125, "num_input_tokens_seen": 83330424, "step": 1330 }, { "epoch": 4.429284525790349, "grad_norm": 18.78822135925293, "learning_rate": 5e-06, "loss": 0.7177, "num_input_tokens_seen": 83394464, "step": 1331 }, { "epoch": 4.429284525790349, "loss": 0.7250956296920776, "loss_ce": 0.00024215054872911423, "loss_iou": 0.232421875, "loss_num": 0.052001953125, "loss_xval": 0.7265625, "num_input_tokens_seen": 83394464, "step": 1331 }, { "epoch": 4.43261231281198, "grad_norm": 17.859045028686523, "learning_rate": 5e-06, "loss": 0.5746, "num_input_tokens_seen": 83456320, "step": 1332 }, { "epoch": 4.43261231281198, "loss": 0.5926786065101624, "loss_ce": 2.7240082999924198e-05, "loss_iou": 0.193359375, "loss_num": 0.04150390625, "loss_xval": 0.59375, "num_input_tokens_seen": 83456320, "step": 1332 }, { "epoch": 4.43594009983361, "grad_norm": 28.78057861328125, "learning_rate": 5e-06, "loss": 0.725, "num_input_tokens_seen": 83519880, "step": 1333 }, { "epoch": 4.43594009983361, "loss": 0.6718736886978149, "loss_ce": 0.0007310921791940928, "loss_iou": 0.23828125, "loss_num": 0.038818359375, "loss_xval": 0.671875, "num_input_tokens_seen": 83519880, "step": 1333 }, { "epoch": 4.439267886855241, "grad_norm": 13.343917846679688, "learning_rate": 5e-06, "loss": 0.6955, "num_input_tokens_seen": 83581528, "step": 1334 }, { "epoch": 4.439267886855241, "loss": 0.6046224236488342, "loss_ce": 0.00013025110820308328, "loss_iou": 0.197265625, "loss_num": 0.041748046875, "loss_xval": 0.60546875, "num_input_tokens_seen": 83581528, "step": 1334 }, { "epoch": 4.4425956738768715, "grad_norm": 24.18421745300293, "learning_rate": 5e-06, "loss": 0.9336, "num_input_tokens_seen": 83645572, "step": 1335 }, { "epoch": 4.4425956738768715, "loss": 0.9804202318191528, "loss_ce": 0.00019555243488866836, "loss_iou": 0.37109375, "loss_num": 0.048095703125, "loss_xval": 0.98046875, "num_input_tokens_seen": 83645572, "step": 1335 }, { "epoch": 4.445923460898502, "grad_norm": 19.95655632019043, "learning_rate": 5e-06, "loss": 0.6435, "num_input_tokens_seen": 83708536, "step": 1336 }, { "epoch": 4.445923460898502, "loss": 0.7947807312011719, "loss_ce": 0.00046916649444028735, "loss_iou": 0.29296875, "loss_num": 0.0419921875, "loss_xval": 0.79296875, "num_input_tokens_seen": 83708536, "step": 1336 }, { "epoch": 4.449251247920133, "grad_norm": 13.971914291381836, "learning_rate": 5e-06, "loss": 0.6339, "num_input_tokens_seen": 83771940, "step": 1337 }, { "epoch": 4.449251247920133, "loss": 0.627832293510437, "loss_ce": 2.4645694793434814e-05, "loss_iou": 0.21875, "loss_num": 0.03759765625, "loss_xval": 0.62890625, "num_input_tokens_seen": 83771940, "step": 1337 }, { "epoch": 4.452579034941763, "grad_norm": 28.279212951660156, "learning_rate": 5e-06, "loss": 0.5821, "num_input_tokens_seen": 83833204, "step": 1338 }, { "epoch": 4.452579034941763, "loss": 0.6125773191452026, "loss_ce": 0.00015060522127896547, "loss_iou": 0.126953125, "loss_num": 0.07177734375, "loss_xval": 0.61328125, "num_input_tokens_seen": 83833204, "step": 1338 }, { "epoch": 4.455906821963394, "grad_norm": 24.268396377563477, "learning_rate": 5e-06, "loss": 0.82, "num_input_tokens_seen": 83895096, "step": 1339 }, { "epoch": 4.455906821963394, "loss": 0.8381412029266357, "loss_ce": 6.506794306915253e-06, "loss_iou": 0.265625, "loss_num": 0.0615234375, "loss_xval": 0.83984375, "num_input_tokens_seen": 83895096, "step": 1339 }, { "epoch": 4.4592346089850246, "grad_norm": 8.350409507751465, "learning_rate": 5e-06, "loss": 0.8535, "num_input_tokens_seen": 83957376, "step": 1340 }, { "epoch": 4.4592346089850246, "loss": 1.058552861213684, "loss_ce": 0.0008135715615935624, "loss_iou": 0.376953125, "loss_num": 0.060546875, "loss_xval": 1.0546875, "num_input_tokens_seen": 83957376, "step": 1340 }, { "epoch": 4.462562396006655, "grad_norm": 17.044376373291016, "learning_rate": 5e-06, "loss": 0.767, "num_input_tokens_seen": 84020204, "step": 1341 }, { "epoch": 4.462562396006655, "loss": 0.725034236907959, "loss_ce": 0.0011573644587770104, "loss_iou": 0.2392578125, "loss_num": 0.04931640625, "loss_xval": 0.72265625, "num_input_tokens_seen": 84020204, "step": 1341 }, { "epoch": 4.465890183028286, "grad_norm": 11.960426330566406, "learning_rate": 5e-06, "loss": 0.9019, "num_input_tokens_seen": 84085084, "step": 1342 }, { "epoch": 4.465890183028286, "loss": 0.8089953660964966, "loss_ce": 0.0013781640445813537, "loss_iou": 0.263671875, "loss_num": 0.05615234375, "loss_xval": 0.80859375, "num_input_tokens_seen": 84085084, "step": 1342 }, { "epoch": 4.469217970049916, "grad_norm": 13.375765800476074, "learning_rate": 5e-06, "loss": 0.6083, "num_input_tokens_seen": 84147448, "step": 1343 }, { "epoch": 4.469217970049916, "loss": 0.7444367408752441, "loss_ce": 0.0001740536536090076, "loss_iou": 0.267578125, "loss_num": 0.04150390625, "loss_xval": 0.74609375, "num_input_tokens_seen": 84147448, "step": 1343 }, { "epoch": 4.472545757071547, "grad_norm": 24.798673629760742, "learning_rate": 5e-06, "loss": 1.0596, "num_input_tokens_seen": 84211792, "step": 1344 }, { "epoch": 4.472545757071547, "loss": 1.0900293588638306, "loss_ce": 0.001162198605015874, "loss_iou": 0.3671875, "loss_num": 0.07080078125, "loss_xval": 1.0859375, "num_input_tokens_seen": 84211792, "step": 1344 }, { "epoch": 4.475873544093178, "grad_norm": 46.3792610168457, "learning_rate": 5e-06, "loss": 0.7852, "num_input_tokens_seen": 84274836, "step": 1345 }, { "epoch": 4.475873544093178, "loss": 0.5791232585906982, "loss_ce": 2.174517430830747e-05, "loss_iou": 0.216796875, "loss_num": 0.029052734375, "loss_xval": 0.578125, "num_input_tokens_seen": 84274836, "step": 1345 }, { "epoch": 4.479201331114808, "grad_norm": 19.27472496032715, "learning_rate": 5e-06, "loss": 0.5985, "num_input_tokens_seen": 84338324, "step": 1346 }, { "epoch": 4.479201331114808, "loss": 0.7217234373092651, "loss_ce": 0.0005319793708622456, "loss_iou": 0.28515625, "loss_num": 0.0299072265625, "loss_xval": 0.72265625, "num_input_tokens_seen": 84338324, "step": 1346 }, { "epoch": 4.482529118136439, "grad_norm": 20.793636322021484, "learning_rate": 5e-06, "loss": 0.7234, "num_input_tokens_seen": 84400104, "step": 1347 }, { "epoch": 4.482529118136439, "loss": 0.6533622741699219, "loss_ce": 0.00040814554085955024, "loss_iou": 0.2060546875, "loss_num": 0.048095703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 84400104, "step": 1347 }, { "epoch": 4.4858569051580695, "grad_norm": 12.040908813476562, "learning_rate": 5e-06, "loss": 0.815, "num_input_tokens_seen": 84462732, "step": 1348 }, { "epoch": 4.4858569051580695, "loss": 0.7836884260177612, "loss_ce": 0.00048530942876823246, "loss_iou": 0.265625, "loss_num": 0.05029296875, "loss_xval": 0.78125, "num_input_tokens_seen": 84462732, "step": 1348 }, { "epoch": 4.4891846921797, "grad_norm": 18.848194122314453, "learning_rate": 5e-06, "loss": 0.8167, "num_input_tokens_seen": 84526312, "step": 1349 }, { "epoch": 4.4891846921797, "loss": 0.8186991214752197, "loss_ce": 0.0005839316290803254, "loss_iou": 0.267578125, "loss_num": 0.056640625, "loss_xval": 0.81640625, "num_input_tokens_seen": 84526312, "step": 1349 }, { "epoch": 4.492512479201331, "grad_norm": 16.940589904785156, "learning_rate": 5e-06, "loss": 0.8814, "num_input_tokens_seen": 84589304, "step": 1350 }, { "epoch": 4.492512479201331, "loss": 0.7323105335235596, "loss_ce": 0.0004990027518942952, "loss_iou": 0.244140625, "loss_num": 0.048583984375, "loss_xval": 0.73046875, "num_input_tokens_seen": 84589304, "step": 1350 }, { "epoch": 4.495840266222961, "grad_norm": 12.424930572509766, "learning_rate": 5e-06, "loss": 0.6786, "num_input_tokens_seen": 84653056, "step": 1351 }, { "epoch": 4.495840266222961, "loss": 0.594731330871582, "loss_ce": 4.724020072899293e-06, "loss_iou": 0.2177734375, "loss_num": 0.03173828125, "loss_xval": 0.59375, "num_input_tokens_seen": 84653056, "step": 1351 }, { "epoch": 4.499168053244592, "grad_norm": 8.971871376037598, "learning_rate": 5e-06, "loss": 0.719, "num_input_tokens_seen": 84715604, "step": 1352 }, { "epoch": 4.499168053244592, "loss": 0.7548971176147461, "loss_ce": 0.00025852146791294217, "loss_iou": 0.251953125, "loss_num": 0.0498046875, "loss_xval": 0.75390625, "num_input_tokens_seen": 84715604, "step": 1352 }, { "epoch": 4.5024958402662225, "grad_norm": 25.51188087463379, "learning_rate": 5e-06, "loss": 0.5393, "num_input_tokens_seen": 84778444, "step": 1353 }, { "epoch": 4.5024958402662225, "loss": 0.4582330584526062, "loss_ce": 4.212010753690265e-05, "loss_iou": 0.146484375, "loss_num": 0.033203125, "loss_xval": 0.458984375, "num_input_tokens_seen": 84778444, "step": 1353 }, { "epoch": 4.505823627287853, "grad_norm": 28.863351821899414, "learning_rate": 5e-06, "loss": 0.586, "num_input_tokens_seen": 84841420, "step": 1354 }, { "epoch": 4.505823627287853, "loss": 0.7277034521102905, "loss_ce": 0.0015376773662865162, "loss_iou": 0.2451171875, "loss_num": 0.04736328125, "loss_xval": 0.7265625, "num_input_tokens_seen": 84841420, "step": 1354 }, { "epoch": 4.509151414309484, "grad_norm": 11.419045448303223, "learning_rate": 5e-06, "loss": 0.9489, "num_input_tokens_seen": 84905164, "step": 1355 }, { "epoch": 4.509151414309484, "loss": 0.9230769872665405, "loss_ce": 0.00022541567159350961, "loss_iou": 0.318359375, "loss_num": 0.05712890625, "loss_xval": 0.921875, "num_input_tokens_seen": 84905164, "step": 1355 }, { "epoch": 4.512479201331114, "grad_norm": 8.96274471282959, "learning_rate": 5e-06, "loss": 0.7418, "num_input_tokens_seen": 84969012, "step": 1356 }, { "epoch": 4.512479201331114, "loss": 0.7852834463119507, "loss_ce": 0.0008596146362833679, "loss_iou": 0.25, "loss_num": 0.056640625, "loss_xval": 0.78515625, "num_input_tokens_seen": 84969012, "step": 1356 }, { "epoch": 4.515806988352745, "grad_norm": 8.02625560760498, "learning_rate": 5e-06, "loss": 0.6062, "num_input_tokens_seen": 85031800, "step": 1357 }, { "epoch": 4.515806988352745, "loss": 0.5827827453613281, "loss_ce": 1.9044146029045805e-05, "loss_iou": 0.201171875, "loss_num": 0.036376953125, "loss_xval": 0.58203125, "num_input_tokens_seen": 85031800, "step": 1357 }, { "epoch": 4.519134775374376, "grad_norm": 46.9227409362793, "learning_rate": 5e-06, "loss": 0.6798, "num_input_tokens_seen": 85095404, "step": 1358 }, { "epoch": 4.519134775374376, "loss": 0.7685559988021851, "loss_ce": 0.00012336293002590537, "loss_iou": 0.26171875, "loss_num": 0.04931640625, "loss_xval": 0.76953125, "num_input_tokens_seen": 85095404, "step": 1358 }, { "epoch": 4.522462562396006, "grad_norm": 34.97386932373047, "learning_rate": 5e-06, "loss": 0.6895, "num_input_tokens_seen": 85158752, "step": 1359 }, { "epoch": 4.522462562396006, "loss": 0.8442423343658447, "loss_ce": 4.0554509723733645e-06, "loss_iou": 0.33203125, "loss_num": 0.036376953125, "loss_xval": 0.84375, "num_input_tokens_seen": 85158752, "step": 1359 }, { "epoch": 4.525790349417637, "grad_norm": 44.28080749511719, "learning_rate": 5e-06, "loss": 1.0204, "num_input_tokens_seen": 85222508, "step": 1360 }, { "epoch": 4.525790349417637, "loss": 0.9339932203292847, "loss_ce": 3.327502417960204e-05, "loss_iou": 0.341796875, "loss_num": 0.050537109375, "loss_xval": 0.93359375, "num_input_tokens_seen": 85222508, "step": 1360 }, { "epoch": 4.529118136439267, "grad_norm": 6.506110191345215, "learning_rate": 5e-06, "loss": 0.6121, "num_input_tokens_seen": 85283528, "step": 1361 }, { "epoch": 4.529118136439267, "loss": 0.5493087768554688, "loss_ce": 0.00023652684467379004, "loss_iou": 0.1865234375, "loss_num": 0.03515625, "loss_xval": 0.55078125, "num_input_tokens_seen": 85283528, "step": 1361 }, { "epoch": 4.532445923460898, "grad_norm": 10.900887489318848, "learning_rate": 5e-06, "loss": 0.7382, "num_input_tokens_seen": 85347012, "step": 1362 }, { "epoch": 4.532445923460898, "loss": 0.7557834386825562, "loss_ce": 0.000931144692003727, "loss_iou": 0.255859375, "loss_num": 0.048583984375, "loss_xval": 0.75390625, "num_input_tokens_seen": 85347012, "step": 1362 }, { "epoch": 4.535773710482529, "grad_norm": 35.10896301269531, "learning_rate": 5e-06, "loss": 0.766, "num_input_tokens_seen": 85409800, "step": 1363 }, { "epoch": 4.535773710482529, "loss": 0.899711549282074, "loss_ce": 0.0012740622041746974, "loss_iou": 0.3671875, "loss_num": 0.032470703125, "loss_xval": 0.8984375, "num_input_tokens_seen": 85409800, "step": 1363 }, { "epoch": 4.539101497504159, "grad_norm": 34.48576354980469, "learning_rate": 5e-06, "loss": 0.8018, "num_input_tokens_seen": 85471848, "step": 1364 }, { "epoch": 4.539101497504159, "loss": 0.7342795133590698, "loss_ce": 0.0008810532744973898, "loss_iou": 0.25390625, "loss_num": 0.045166015625, "loss_xval": 0.734375, "num_input_tokens_seen": 85471848, "step": 1364 }, { "epoch": 4.54242928452579, "grad_norm": 8.720282554626465, "learning_rate": 5e-06, "loss": 0.6187, "num_input_tokens_seen": 85535028, "step": 1365 }, { "epoch": 4.54242928452579, "loss": 0.6450221538543701, "loss_ce": 2.5852832550299354e-06, "loss_iou": 0.21484375, "loss_num": 0.042724609375, "loss_xval": 0.64453125, "num_input_tokens_seen": 85535028, "step": 1365 }, { "epoch": 4.5457570715474205, "grad_norm": 31.563127517700195, "learning_rate": 5e-06, "loss": 0.6799, "num_input_tokens_seen": 85598856, "step": 1366 }, { "epoch": 4.5457570715474205, "loss": 0.5118007659912109, "loss_ce": 0.0013026673113927245, "loss_iou": 0.1318359375, "loss_num": 0.04931640625, "loss_xval": 0.51171875, "num_input_tokens_seen": 85598856, "step": 1366 }, { "epoch": 4.549084858569051, "grad_norm": 16.306110382080078, "learning_rate": 5e-06, "loss": 0.4548, "num_input_tokens_seen": 85661384, "step": 1367 }, { "epoch": 4.549084858569051, "loss": 0.43176573514938354, "loss_ce": 3.0542746571882162e-06, "loss_iou": 0.1328125, "loss_num": 0.033447265625, "loss_xval": 0.431640625, "num_input_tokens_seen": 85661384, "step": 1367 }, { "epoch": 4.552412645590682, "grad_norm": 11.156771659851074, "learning_rate": 5e-06, "loss": 0.6683, "num_input_tokens_seen": 85724720, "step": 1368 }, { "epoch": 4.552412645590682, "loss": 0.8609325289726257, "loss_ce": 0.0008250840473920107, "loss_iou": 0.33203125, "loss_num": 0.038818359375, "loss_xval": 0.859375, "num_input_tokens_seen": 85724720, "step": 1368 }, { "epoch": 4.555740432612312, "grad_norm": 11.832956314086914, "learning_rate": 5e-06, "loss": 0.4961, "num_input_tokens_seen": 85786512, "step": 1369 }, { "epoch": 4.555740432612312, "loss": 0.27058541774749756, "loss_ce": 1.6586316633038223e-05, "loss_iou": 0.0810546875, "loss_num": 0.0218505859375, "loss_xval": 0.271484375, "num_input_tokens_seen": 85786512, "step": 1369 }, { "epoch": 4.559068219633943, "grad_norm": 10.155105590820312, "learning_rate": 5e-06, "loss": 0.7228, "num_input_tokens_seen": 85849112, "step": 1370 }, { "epoch": 4.559068219633943, "loss": 0.6971813440322876, "loss_ce": 0.0015025895554572344, "loss_iou": 0.2275390625, "loss_num": 0.0478515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 85849112, "step": 1370 }, { "epoch": 4.5623960066555735, "grad_norm": 13.69803524017334, "learning_rate": 5e-06, "loss": 0.6738, "num_input_tokens_seen": 85912496, "step": 1371 }, { "epoch": 4.5623960066555735, "loss": 0.6464178562164307, "loss_ce": 0.00042175239650532603, "loss_iou": 0.1982421875, "loss_num": 0.050048828125, "loss_xval": 0.64453125, "num_input_tokens_seen": 85912496, "step": 1371 }, { "epoch": 4.565723793677205, "grad_norm": 14.567577362060547, "learning_rate": 5e-06, "loss": 0.4595, "num_input_tokens_seen": 85974008, "step": 1372 }, { "epoch": 4.565723793677205, "loss": 0.5371776223182678, "loss_ce": 0.00019030201656278223, "loss_iou": 0.1796875, "loss_num": 0.035400390625, "loss_xval": 0.53515625, "num_input_tokens_seen": 85974008, "step": 1372 }, { "epoch": 4.569051580698836, "grad_norm": 11.320833206176758, "learning_rate": 5e-06, "loss": 0.4038, "num_input_tokens_seen": 86036360, "step": 1373 }, { "epoch": 4.569051580698836, "loss": 0.4081704616546631, "loss_ce": 0.00021149149688426405, "loss_iou": 0.1162109375, "loss_num": 0.03515625, "loss_xval": 0.408203125, "num_input_tokens_seen": 86036360, "step": 1373 }, { "epoch": 4.572379367720466, "grad_norm": 8.902009010314941, "learning_rate": 5e-06, "loss": 0.6014, "num_input_tokens_seen": 86098024, "step": 1374 }, { "epoch": 4.572379367720466, "loss": 0.531557559967041, "loss_ce": 2.3850705019867746e-06, "loss_iou": 0.138671875, "loss_num": 0.051025390625, "loss_xval": 0.53125, "num_input_tokens_seen": 86098024, "step": 1374 }, { "epoch": 4.575707154742097, "grad_norm": 8.554510116577148, "learning_rate": 5e-06, "loss": 0.8192, "num_input_tokens_seen": 86160932, "step": 1375 }, { "epoch": 4.575707154742097, "loss": 1.052072525024414, "loss_ce": 0.0005588348722085357, "loss_iou": 0.380859375, "loss_num": 0.057861328125, "loss_xval": 1.0546875, "num_input_tokens_seen": 86160932, "step": 1375 }, { "epoch": 4.5790349417637275, "grad_norm": 13.125633239746094, "learning_rate": 5e-06, "loss": 0.6643, "num_input_tokens_seen": 86223500, "step": 1376 }, { "epoch": 4.5790349417637275, "loss": 0.6385508179664612, "loss_ce": 0.00015362344856839627, "loss_iou": 0.23828125, "loss_num": 0.0322265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 86223500, "step": 1376 }, { "epoch": 4.582362728785358, "grad_norm": 8.183598518371582, "learning_rate": 5e-06, "loss": 0.5159, "num_input_tokens_seen": 86284812, "step": 1377 }, { "epoch": 4.582362728785358, "loss": 0.6321967840194702, "loss_ce": 0.00036084238672629, "loss_iou": 0.2412109375, "loss_num": 0.0302734375, "loss_xval": 0.6328125, "num_input_tokens_seen": 86284812, "step": 1377 }, { "epoch": 4.585690515806989, "grad_norm": 12.763681411743164, "learning_rate": 5e-06, "loss": 0.6332, "num_input_tokens_seen": 86348508, "step": 1378 }, { "epoch": 4.585690515806989, "loss": 0.5611585378646851, "loss_ce": 0.00012342022091615945, "loss_iou": 0.19921875, "loss_num": 0.03271484375, "loss_xval": 0.5625, "num_input_tokens_seen": 86348508, "step": 1378 }, { "epoch": 4.589018302828619, "grad_norm": 6.641970634460449, "learning_rate": 5e-06, "loss": 0.5477, "num_input_tokens_seen": 86409256, "step": 1379 }, { "epoch": 4.589018302828619, "loss": 0.5002583265304565, "loss_ce": 1.4131117495708168e-05, "loss_iou": 0.1650390625, "loss_num": 0.033935546875, "loss_xval": 0.5, "num_input_tokens_seen": 86409256, "step": 1379 }, { "epoch": 4.59234608985025, "grad_norm": 26.70740509033203, "learning_rate": 5e-06, "loss": 0.9453, "num_input_tokens_seen": 86473068, "step": 1380 }, { "epoch": 4.59234608985025, "loss": 1.0360589027404785, "loss_ce": 4.8118923587026075e-05, "loss_iou": 0.369140625, "loss_num": 0.059814453125, "loss_xval": 1.0390625, "num_input_tokens_seen": 86473068, "step": 1380 }, { "epoch": 4.595673876871881, "grad_norm": 8.058368682861328, "learning_rate": 5e-06, "loss": 0.7622, "num_input_tokens_seen": 86535376, "step": 1381 }, { "epoch": 4.595673876871881, "loss": 0.6889675259590149, "loss_ce": 2.6555962904240005e-06, "loss_iou": 0.2421875, "loss_num": 0.041259765625, "loss_xval": 0.6875, "num_input_tokens_seen": 86535376, "step": 1381 }, { "epoch": 4.599001663893511, "grad_norm": 18.06012725830078, "learning_rate": 5e-06, "loss": 0.7193, "num_input_tokens_seen": 86598728, "step": 1382 }, { "epoch": 4.599001663893511, "loss": 0.5724185705184937, "loss_ce": 0.00015296557103283703, "loss_iou": 0.2001953125, "loss_num": 0.034423828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 86598728, "step": 1382 }, { "epoch": 4.602329450915142, "grad_norm": 6.92100715637207, "learning_rate": 5e-06, "loss": 0.743, "num_input_tokens_seen": 86661740, "step": 1383 }, { "epoch": 4.602329450915142, "loss": 0.6485388875007629, "loss_ce": 0.000711729924660176, "loss_iou": 0.2001953125, "loss_num": 0.04931640625, "loss_xval": 0.6484375, "num_input_tokens_seen": 86661740, "step": 1383 }, { "epoch": 4.605657237936772, "grad_norm": 14.679880142211914, "learning_rate": 5e-06, "loss": 0.7799, "num_input_tokens_seen": 86726256, "step": 1384 }, { "epoch": 4.605657237936772, "loss": 0.7865468859672546, "loss_ce": 0.0009023505263030529, "loss_iou": 0.3125, "loss_num": 0.031982421875, "loss_xval": 0.78515625, "num_input_tokens_seen": 86726256, "step": 1384 }, { "epoch": 4.608985024958403, "grad_norm": 15.619338989257812, "learning_rate": 5e-06, "loss": 0.5165, "num_input_tokens_seen": 86787164, "step": 1385 }, { "epoch": 4.608985024958403, "loss": 0.5341886281967163, "loss_ce": 8.948968570621219e-06, "loss_iou": 0.1689453125, "loss_num": 0.0390625, "loss_xval": 0.53515625, "num_input_tokens_seen": 86787164, "step": 1385 }, { "epoch": 4.612312811980034, "grad_norm": 11.672150611877441, "learning_rate": 5e-06, "loss": 0.6922, "num_input_tokens_seen": 86849700, "step": 1386 }, { "epoch": 4.612312811980034, "loss": 0.7309243679046631, "loss_ce": 0.00045559878344647586, "loss_iou": 0.21875, "loss_num": 0.058837890625, "loss_xval": 0.73046875, "num_input_tokens_seen": 86849700, "step": 1386 }, { "epoch": 4.615640599001664, "grad_norm": 10.545302391052246, "learning_rate": 5e-06, "loss": 0.9422, "num_input_tokens_seen": 86911848, "step": 1387 }, { "epoch": 4.615640599001664, "loss": 1.0675830841064453, "loss_ce": 0.00026132259517908096, "loss_iou": 0.404296875, "loss_num": 0.0517578125, "loss_xval": 1.0703125, "num_input_tokens_seen": 86911848, "step": 1387 }, { "epoch": 4.618968386023295, "grad_norm": 18.017080307006836, "learning_rate": 5e-06, "loss": 0.6596, "num_input_tokens_seen": 86974584, "step": 1388 }, { "epoch": 4.618968386023295, "loss": 0.7650818228721619, "loss_ce": 0.0007995798951014876, "loss_iou": 0.283203125, "loss_num": 0.039794921875, "loss_xval": 0.765625, "num_input_tokens_seen": 86974584, "step": 1388 }, { "epoch": 4.6222961730449255, "grad_norm": 7.790538787841797, "learning_rate": 5e-06, "loss": 0.7499, "num_input_tokens_seen": 87035060, "step": 1389 }, { "epoch": 4.6222961730449255, "loss": 0.7779638171195984, "loss_ce": 9.695755579741672e-06, "loss_iou": 0.25390625, "loss_num": 0.053955078125, "loss_xval": 0.77734375, "num_input_tokens_seen": 87035060, "step": 1389 }, { "epoch": 4.625623960066556, "grad_norm": 23.20652198791504, "learning_rate": 5e-06, "loss": 0.7455, "num_input_tokens_seen": 87095904, "step": 1390 }, { "epoch": 4.625623960066556, "loss": 0.7988479733467102, "loss_ce": 0.0002639756421558559, "loss_iou": 0.283203125, "loss_num": 0.046142578125, "loss_xval": 0.796875, "num_input_tokens_seen": 87095904, "step": 1390 }, { "epoch": 4.628951747088187, "grad_norm": 17.283559799194336, "learning_rate": 5e-06, "loss": 0.6634, "num_input_tokens_seen": 87158536, "step": 1391 }, { "epoch": 4.628951747088187, "loss": 0.8823537230491638, "loss_ce": 0.0007619569660164416, "loss_iou": 0.267578125, "loss_num": 0.06884765625, "loss_xval": 0.8828125, "num_input_tokens_seen": 87158536, "step": 1391 }, { "epoch": 4.632279534109817, "grad_norm": 12.51486587524414, "learning_rate": 5e-06, "loss": 0.7721, "num_input_tokens_seen": 87221724, "step": 1392 }, { "epoch": 4.632279534109817, "loss": 0.759742021560669, "loss_ce": 9.84323924058117e-05, "loss_iou": 0.189453125, "loss_num": 0.076171875, "loss_xval": 0.7578125, "num_input_tokens_seen": 87221724, "step": 1392 }, { "epoch": 4.635607321131448, "grad_norm": 8.030847549438477, "learning_rate": 5e-06, "loss": 0.8579, "num_input_tokens_seen": 87284540, "step": 1393 }, { "epoch": 4.635607321131448, "loss": 0.8183420896530151, "loss_ce": 0.0014476042706519365, "loss_iou": 0.30078125, "loss_num": 0.043212890625, "loss_xval": 0.81640625, "num_input_tokens_seen": 87284540, "step": 1393 }, { "epoch": 4.6389351081530785, "grad_norm": 19.066869735717773, "learning_rate": 5e-06, "loss": 0.6132, "num_input_tokens_seen": 87345048, "step": 1394 }, { "epoch": 4.6389351081530785, "loss": 0.7153163552284241, "loss_ce": 0.00047258762060664594, "loss_iou": 0.23046875, "loss_num": 0.05078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 87345048, "step": 1394 }, { "epoch": 4.642262895174709, "grad_norm": 7.339348316192627, "learning_rate": 5e-06, "loss": 0.647, "num_input_tokens_seen": 87408380, "step": 1395 }, { "epoch": 4.642262895174709, "loss": 0.777653694152832, "loss_ce": 0.0004319966828916222, "loss_iou": 0.2890625, "loss_num": 0.039794921875, "loss_xval": 0.77734375, "num_input_tokens_seen": 87408380, "step": 1395 }, { "epoch": 4.64559068219634, "grad_norm": 11.174449920654297, "learning_rate": 5e-06, "loss": 0.8674, "num_input_tokens_seen": 87471892, "step": 1396 }, { "epoch": 4.64559068219634, "loss": 0.796759307384491, "loss_ce": 0.0002505228912923485, "loss_iou": 0.267578125, "loss_num": 0.052001953125, "loss_xval": 0.796875, "num_input_tokens_seen": 87471892, "step": 1396 }, { "epoch": 4.64891846921797, "grad_norm": 11.844344139099121, "learning_rate": 5e-06, "loss": 0.6369, "num_input_tokens_seen": 87534040, "step": 1397 }, { "epoch": 4.64891846921797, "loss": 0.6389685869216919, "loss_ce": 0.0004188179736956954, "loss_iou": 0.21875, "loss_num": 0.04052734375, "loss_xval": 0.63671875, "num_input_tokens_seen": 87534040, "step": 1397 }, { "epoch": 4.652246256239601, "grad_norm": 15.831585884094238, "learning_rate": 5e-06, "loss": 0.9046, "num_input_tokens_seen": 87596240, "step": 1398 }, { "epoch": 4.652246256239601, "loss": 0.7054827213287354, "loss_ce": 0.0008928956813178957, "loss_iou": 0.2138671875, "loss_num": 0.0556640625, "loss_xval": 0.703125, "num_input_tokens_seen": 87596240, "step": 1398 }, { "epoch": 4.655574043261232, "grad_norm": 6.208771705627441, "learning_rate": 5e-06, "loss": 0.6544, "num_input_tokens_seen": 87657412, "step": 1399 }, { "epoch": 4.655574043261232, "loss": 0.7047736644744873, "loss_ce": 0.00042791658779606223, "loss_iou": 0.208984375, "loss_num": 0.05712890625, "loss_xval": 0.703125, "num_input_tokens_seen": 87657412, "step": 1399 }, { "epoch": 4.658901830282862, "grad_norm": 15.71760082244873, "learning_rate": 5e-06, "loss": 0.5694, "num_input_tokens_seen": 87721156, "step": 1400 }, { "epoch": 4.658901830282862, "loss": 0.4697321057319641, "loss_ce": 0.0004938616184517741, "loss_iou": 0.146484375, "loss_num": 0.03515625, "loss_xval": 0.46875, "num_input_tokens_seen": 87721156, "step": 1400 }, { "epoch": 4.662229617304493, "grad_norm": 6.701347351074219, "learning_rate": 5e-06, "loss": 0.8324, "num_input_tokens_seen": 87784928, "step": 1401 }, { "epoch": 4.662229617304493, "loss": 0.851381778717041, "loss_ce": 0.0012840689159929752, "loss_iou": 0.294921875, "loss_num": 0.052001953125, "loss_xval": 0.8515625, "num_input_tokens_seen": 87784928, "step": 1401 }, { "epoch": 4.665557404326123, "grad_norm": 9.888707160949707, "learning_rate": 5e-06, "loss": 0.6811, "num_input_tokens_seen": 87846888, "step": 1402 }, { "epoch": 4.665557404326123, "loss": 0.545545220375061, "loss_ce": 0.0025764962192624807, "loss_iou": 0.15625, "loss_num": 0.046142578125, "loss_xval": 0.54296875, "num_input_tokens_seen": 87846888, "step": 1402 }, { "epoch": 4.668885191347754, "grad_norm": 12.931994438171387, "learning_rate": 5e-06, "loss": 0.6414, "num_input_tokens_seen": 87909236, "step": 1403 }, { "epoch": 4.668885191347754, "loss": 0.5946015119552612, "loss_ce": 0.000485275435494259, "loss_iou": 0.208984375, "loss_num": 0.03515625, "loss_xval": 0.59375, "num_input_tokens_seen": 87909236, "step": 1403 }, { "epoch": 4.672212978369385, "grad_norm": 10.527796745300293, "learning_rate": 5e-06, "loss": 0.7106, "num_input_tokens_seen": 87971780, "step": 1404 }, { "epoch": 4.672212978369385, "loss": 0.6194738149642944, "loss_ce": 0.002530462807044387, "loss_iou": 0.1513671875, "loss_num": 0.06298828125, "loss_xval": 0.6171875, "num_input_tokens_seen": 87971780, "step": 1404 }, { "epoch": 4.675540765391015, "grad_norm": 7.018352031707764, "learning_rate": 5e-06, "loss": 0.8546, "num_input_tokens_seen": 88034908, "step": 1405 }, { "epoch": 4.675540765391015, "loss": 0.9526970386505127, "loss_ce": 6.0325139202177525e-05, "loss_iou": 0.298828125, "loss_num": 0.07080078125, "loss_xval": 0.953125, "num_input_tokens_seen": 88034908, "step": 1405 }, { "epoch": 4.678868552412646, "grad_norm": 7.258470058441162, "learning_rate": 5e-06, "loss": 0.5011, "num_input_tokens_seen": 88096500, "step": 1406 }, { "epoch": 4.678868552412646, "loss": 0.65594881772995, "loss_ce": 0.00018714150064624846, "loss_iou": 0.166015625, "loss_num": 0.06494140625, "loss_xval": 0.65625, "num_input_tokens_seen": 88096500, "step": 1406 }, { "epoch": 4.6821963394342765, "grad_norm": 12.497472763061523, "learning_rate": 5e-06, "loss": 0.5734, "num_input_tokens_seen": 88157424, "step": 1407 }, { "epoch": 4.6821963394342765, "loss": 0.5287968516349792, "loss_ce": 0.0008427201537415385, "loss_iou": 0.18359375, "loss_num": 0.031982421875, "loss_xval": 0.52734375, "num_input_tokens_seen": 88157424, "step": 1407 }, { "epoch": 4.685524126455907, "grad_norm": 13.06940746307373, "learning_rate": 5e-06, "loss": 0.6403, "num_input_tokens_seen": 88219076, "step": 1408 }, { "epoch": 4.685524126455907, "loss": 0.5052746534347534, "loss_ce": 2.560833490861114e-05, "loss_iou": 0.140625, "loss_num": 0.044921875, "loss_xval": 0.50390625, "num_input_tokens_seen": 88219076, "step": 1408 }, { "epoch": 4.688851913477538, "grad_norm": 8.848599433898926, "learning_rate": 5e-06, "loss": 0.5396, "num_input_tokens_seen": 88281864, "step": 1409 }, { "epoch": 4.688851913477538, "loss": 0.6765046119689941, "loss_ce": 0.00011294549040030688, "loss_iou": 0.1884765625, "loss_num": 0.059814453125, "loss_xval": 0.67578125, "num_input_tokens_seen": 88281864, "step": 1409 }, { "epoch": 4.692179700499168, "grad_norm": 8.952995300292969, "learning_rate": 5e-06, "loss": 0.4109, "num_input_tokens_seen": 88342824, "step": 1410 }, { "epoch": 4.692179700499168, "loss": 0.5194376707077026, "loss_ce": 0.0006388693582266569, "loss_iou": 0.1103515625, "loss_num": 0.0595703125, "loss_xval": 0.51953125, "num_input_tokens_seen": 88342824, "step": 1410 }, { "epoch": 4.695507487520799, "grad_norm": 11.815835952758789, "learning_rate": 5e-06, "loss": 0.7675, "num_input_tokens_seen": 88405772, "step": 1411 }, { "epoch": 4.695507487520799, "loss": 0.6887185573577881, "loss_ce": 0.00018099366570822895, "loss_iou": 0.2255859375, "loss_num": 0.04736328125, "loss_xval": 0.6875, "num_input_tokens_seen": 88405772, "step": 1411 }, { "epoch": 4.6988352745424296, "grad_norm": 8.99443531036377, "learning_rate": 5e-06, "loss": 0.6304, "num_input_tokens_seen": 88468084, "step": 1412 }, { "epoch": 4.6988352745424296, "loss": 0.8284512758255005, "loss_ce": 8.219605660997331e-05, "loss_iou": 0.244140625, "loss_num": 0.06787109375, "loss_xval": 0.828125, "num_input_tokens_seen": 88468084, "step": 1412 }, { "epoch": 4.70216306156406, "grad_norm": 12.500134468078613, "learning_rate": 5e-06, "loss": 0.808, "num_input_tokens_seen": 88528824, "step": 1413 }, { "epoch": 4.70216306156406, "loss": 0.9188534021377563, "loss_ce": 3.0147095458232798e-05, "loss_iou": 0.283203125, "loss_num": 0.0703125, "loss_xval": 0.91796875, "num_input_tokens_seen": 88528824, "step": 1413 }, { "epoch": 4.705490848585691, "grad_norm": 8.149920463562012, "learning_rate": 5e-06, "loss": 0.5456, "num_input_tokens_seen": 88591064, "step": 1414 }, { "epoch": 4.705490848585691, "loss": 0.4510113000869751, "loss_ce": 2.2540578356711194e-05, "loss_iou": 0.10595703125, "loss_num": 0.0478515625, "loss_xval": 0.451171875, "num_input_tokens_seen": 88591064, "step": 1414 }, { "epoch": 4.708818635607321, "grad_norm": 14.00053882598877, "learning_rate": 5e-06, "loss": 0.995, "num_input_tokens_seen": 88653992, "step": 1415 }, { "epoch": 4.708818635607321, "loss": 1.069594144821167, "loss_ce": 1.4102173736318946e-05, "loss_iou": 0.419921875, "loss_num": 0.04638671875, "loss_xval": 1.0703125, "num_input_tokens_seen": 88653992, "step": 1415 }, { "epoch": 4.712146422628952, "grad_norm": 15.54645824432373, "learning_rate": 5e-06, "loss": 0.6461, "num_input_tokens_seen": 88716116, "step": 1416 }, { "epoch": 4.712146422628952, "loss": 0.41860735416412354, "loss_ce": 0.0001808122469810769, "loss_iou": 0.1484375, "loss_num": 0.0244140625, "loss_xval": 0.41796875, "num_input_tokens_seen": 88716116, "step": 1416 }, { "epoch": 4.715474209650583, "grad_norm": 13.604162216186523, "learning_rate": 5e-06, "loss": 0.5831, "num_input_tokens_seen": 88778872, "step": 1417 }, { "epoch": 4.715474209650583, "loss": 0.6808904409408569, "loss_ce": 0.00022634794004261494, "loss_iou": 0.271484375, "loss_num": 0.02783203125, "loss_xval": 0.6796875, "num_input_tokens_seen": 88778872, "step": 1417 }, { "epoch": 4.718801996672213, "grad_norm": 22.623563766479492, "learning_rate": 5e-06, "loss": 0.5977, "num_input_tokens_seen": 88840616, "step": 1418 }, { "epoch": 4.718801996672213, "loss": 0.7182178497314453, "loss_ce": 0.0010547826532274485, "loss_iou": 0.2197265625, "loss_num": 0.0556640625, "loss_xval": 0.71875, "num_input_tokens_seen": 88840616, "step": 1418 }, { "epoch": 4.722129783693844, "grad_norm": 25.521087646484375, "learning_rate": 5e-06, "loss": 0.9424, "num_input_tokens_seen": 88904500, "step": 1419 }, { "epoch": 4.722129783693844, "loss": 1.0386605262756348, "loss_ce": 0.0005440718960016966, "loss_iou": 0.365234375, "loss_num": 0.0615234375, "loss_xval": 1.0390625, "num_input_tokens_seen": 88904500, "step": 1419 }, { "epoch": 4.7254575707154745, "grad_norm": 32.94253921508789, "learning_rate": 5e-06, "loss": 0.6881, "num_input_tokens_seen": 88966924, "step": 1420 }, { "epoch": 4.7254575707154745, "loss": 0.8263764381408691, "loss_ce": 0.000936914118938148, "loss_iou": 0.2490234375, "loss_num": 0.0654296875, "loss_xval": 0.82421875, "num_input_tokens_seen": 88966924, "step": 1420 }, { "epoch": 4.728785357737105, "grad_norm": 12.381750106811523, "learning_rate": 5e-06, "loss": 0.6323, "num_input_tokens_seen": 89030864, "step": 1421 }, { "epoch": 4.728785357737105, "loss": 0.665398359298706, "loss_ce": 0.00011519945110194385, "loss_iou": 0.208984375, "loss_num": 0.0498046875, "loss_xval": 0.6640625, "num_input_tokens_seen": 89030864, "step": 1421 }, { "epoch": 4.732113144758736, "grad_norm": 12.26732349395752, "learning_rate": 5e-06, "loss": 0.5234, "num_input_tokens_seen": 89092332, "step": 1422 }, { "epoch": 4.732113144758736, "loss": 0.2868744730949402, "loss_ce": 9.247070920537226e-06, "loss_iou": 0.0380859375, "loss_num": 0.042236328125, "loss_xval": 0.287109375, "num_input_tokens_seen": 89092332, "step": 1422 }, { "epoch": 4.735440931780366, "grad_norm": 7.801537990570068, "learning_rate": 5e-06, "loss": 0.8458, "num_input_tokens_seen": 89155040, "step": 1423 }, { "epoch": 4.735440931780366, "loss": 0.6569440364837646, "loss_ce": 0.00038879140629433095, "loss_iou": 0.2236328125, "loss_num": 0.041748046875, "loss_xval": 0.65625, "num_input_tokens_seen": 89155040, "step": 1423 }, { "epoch": 4.738768718801997, "grad_norm": 17.682525634765625, "learning_rate": 5e-06, "loss": 0.7594, "num_input_tokens_seen": 89217588, "step": 1424 }, { "epoch": 4.738768718801997, "loss": 0.6541336178779602, "loss_ce": 0.0006301991525106132, "loss_iou": 0.228515625, "loss_num": 0.039306640625, "loss_xval": 0.65234375, "num_input_tokens_seen": 89217588, "step": 1424 }, { "epoch": 4.7420965058236275, "grad_norm": 19.01534080505371, "learning_rate": 5e-06, "loss": 0.7232, "num_input_tokens_seen": 89281484, "step": 1425 }, { "epoch": 4.7420965058236275, "loss": 0.5709552764892578, "loss_ce": 0.0006427803309634328, "loss_iou": 0.212890625, "loss_num": 0.02880859375, "loss_xval": 0.5703125, "num_input_tokens_seen": 89281484, "step": 1425 }, { "epoch": 4.745424292845258, "grad_norm": 14.757177352905273, "learning_rate": 5e-06, "loss": 0.6833, "num_input_tokens_seen": 89344164, "step": 1426 }, { "epoch": 4.745424292845258, "loss": 0.8227342367172241, "loss_ce": 0.00010239605035167187, "loss_iou": 0.302734375, "loss_num": 0.043701171875, "loss_xval": 0.82421875, "num_input_tokens_seen": 89344164, "step": 1426 }, { "epoch": 4.748752079866889, "grad_norm": 16.073335647583008, "learning_rate": 5e-06, "loss": 0.7343, "num_input_tokens_seen": 89407480, "step": 1427 }, { "epoch": 4.748752079866889, "loss": 0.6605852842330933, "loss_ce": 6.27873232588172e-05, "loss_iou": 0.2099609375, "loss_num": 0.048095703125, "loss_xval": 0.66015625, "num_input_tokens_seen": 89407480, "step": 1427 }, { "epoch": 4.752079866888519, "grad_norm": 12.472293853759766, "learning_rate": 5e-06, "loss": 0.7983, "num_input_tokens_seen": 89470712, "step": 1428 }, { "epoch": 4.752079866888519, "loss": 0.6767415404319763, "loss_ce": 0.0004719903226941824, "loss_iou": 0.2392578125, "loss_num": 0.03955078125, "loss_xval": 0.67578125, "num_input_tokens_seen": 89470712, "step": 1428 }, { "epoch": 4.75540765391015, "grad_norm": 19.07227325439453, "learning_rate": 5e-06, "loss": 0.8962, "num_input_tokens_seen": 89533400, "step": 1429 }, { "epoch": 4.75540765391015, "loss": 1.0112426280975342, "loss_ce": 0.0002563234302215278, "loss_iou": 0.298828125, "loss_num": 0.08251953125, "loss_xval": 1.0078125, "num_input_tokens_seen": 89533400, "step": 1429 }, { "epoch": 4.758735440931781, "grad_norm": 11.964755058288574, "learning_rate": 5e-06, "loss": 0.7413, "num_input_tokens_seen": 89597008, "step": 1430 }, { "epoch": 4.758735440931781, "loss": 0.42090633511543274, "loss_ce": 7.881029887357727e-06, "loss_iou": 0.12451171875, "loss_num": 0.034423828125, "loss_xval": 0.421875, "num_input_tokens_seen": 89597008, "step": 1430 }, { "epoch": 4.762063227953411, "grad_norm": 25.646238327026367, "learning_rate": 5e-06, "loss": 0.788, "num_input_tokens_seen": 89660676, "step": 1431 }, { "epoch": 4.762063227953411, "loss": 0.9184248447418213, "loss_ce": 0.0015548146329820156, "loss_iou": 0.3671875, "loss_num": 0.036865234375, "loss_xval": 0.91796875, "num_input_tokens_seen": 89660676, "step": 1431 }, { "epoch": 4.765391014975042, "grad_norm": 13.840594291687012, "learning_rate": 5e-06, "loss": 0.7977, "num_input_tokens_seen": 89724804, "step": 1432 }, { "epoch": 4.765391014975042, "loss": 0.9030267596244812, "loss_ce": 0.00019475248700473458, "loss_iou": 0.3515625, "loss_num": 0.0400390625, "loss_xval": 0.90234375, "num_input_tokens_seen": 89724804, "step": 1432 }, { "epoch": 4.768718801996672, "grad_norm": 7.904409885406494, "learning_rate": 5e-06, "loss": 0.6688, "num_input_tokens_seen": 89787772, "step": 1433 }, { "epoch": 4.768718801996672, "loss": 0.8160985708236694, "loss_ce": 0.000180591203388758, "loss_iou": 0.294921875, "loss_num": 0.044921875, "loss_xval": 0.81640625, "num_input_tokens_seen": 89787772, "step": 1433 }, { "epoch": 4.772046589018303, "grad_norm": 24.053165435791016, "learning_rate": 5e-06, "loss": 0.871, "num_input_tokens_seen": 89851700, "step": 1434 }, { "epoch": 4.772046589018303, "loss": 0.7983778715133667, "loss_ce": 0.000526325951796025, "loss_iou": 0.306640625, "loss_num": 0.037109375, "loss_xval": 0.796875, "num_input_tokens_seen": 89851700, "step": 1434 }, { "epoch": 4.775374376039934, "grad_norm": 17.900188446044922, "learning_rate": 5e-06, "loss": 0.9376, "num_input_tokens_seen": 89914596, "step": 1435 }, { "epoch": 4.775374376039934, "loss": 1.1730791330337524, "loss_ce": 0.00022753766097594053, "loss_iou": 0.388671875, "loss_num": 0.0791015625, "loss_xval": 1.171875, "num_input_tokens_seen": 89914596, "step": 1435 }, { "epoch": 4.778702163061564, "grad_norm": 12.668126106262207, "learning_rate": 5e-06, "loss": 0.7976, "num_input_tokens_seen": 89976928, "step": 1436 }, { "epoch": 4.778702163061564, "loss": 0.8083900809288025, "loss_ce": 0.0007728736381977797, "loss_iou": 0.236328125, "loss_num": 0.06689453125, "loss_xval": 0.80859375, "num_input_tokens_seen": 89976928, "step": 1436 }, { "epoch": 4.782029950083195, "grad_norm": 13.394881248474121, "learning_rate": 5e-06, "loss": 0.7966, "num_input_tokens_seen": 90040060, "step": 1437 }, { "epoch": 4.782029950083195, "loss": 1.0120275020599365, "loss_ce": 6.466710328822955e-05, "loss_iou": 0.3203125, "loss_num": 0.07470703125, "loss_xval": 1.015625, "num_input_tokens_seen": 90040060, "step": 1437 }, { "epoch": 4.7853577371048255, "grad_norm": 11.221780776977539, "learning_rate": 5e-06, "loss": 0.7538, "num_input_tokens_seen": 90103360, "step": 1438 }, { "epoch": 4.7853577371048255, "loss": 0.7139836549758911, "loss_ce": 0.0008489080937579274, "loss_iou": 0.1953125, "loss_num": 0.064453125, "loss_xval": 0.71484375, "num_input_tokens_seen": 90103360, "step": 1438 }, { "epoch": 4.788685524126456, "grad_norm": 12.567187309265137, "learning_rate": 5e-06, "loss": 0.8503, "num_input_tokens_seen": 90167208, "step": 1439 }, { "epoch": 4.788685524126456, "loss": 0.9829078316688538, "loss_ce": 0.00024178545572794974, "loss_iou": 0.34765625, "loss_num": 0.0576171875, "loss_xval": 0.984375, "num_input_tokens_seen": 90167208, "step": 1439 }, { "epoch": 4.792013311148087, "grad_norm": 12.651561737060547, "learning_rate": 5e-06, "loss": 0.5847, "num_input_tokens_seen": 90229644, "step": 1440 }, { "epoch": 4.792013311148087, "loss": 0.4671025276184082, "loss_ce": 0.0011601548176258802, "loss_iou": 0.1279296875, "loss_num": 0.0419921875, "loss_xval": 0.466796875, "num_input_tokens_seen": 90229644, "step": 1440 }, { "epoch": 4.795341098169717, "grad_norm": 16.65874671936035, "learning_rate": 5e-06, "loss": 0.7644, "num_input_tokens_seen": 90291568, "step": 1441 }, { "epoch": 4.795341098169717, "loss": 0.5984110236167908, "loss_ce": 0.0002665033971425146, "loss_iou": 0.2060546875, "loss_num": 0.037109375, "loss_xval": 0.59765625, "num_input_tokens_seen": 90291568, "step": 1441 }, { "epoch": 4.798668885191348, "grad_norm": 25.318418502807617, "learning_rate": 5e-06, "loss": 0.6216, "num_input_tokens_seen": 90355464, "step": 1442 }, { "epoch": 4.798668885191348, "loss": 0.681909441947937, "loss_ce": 8.57599443406798e-05, "loss_iou": 0.2412109375, "loss_num": 0.039794921875, "loss_xval": 0.68359375, "num_input_tokens_seen": 90355464, "step": 1442 }, { "epoch": 4.8019966722129785, "grad_norm": 9.664568901062012, "learning_rate": 5e-06, "loss": 0.6261, "num_input_tokens_seen": 90418132, "step": 1443 }, { "epoch": 4.8019966722129785, "loss": 0.6628125309944153, "loss_ce": 0.001679704524576664, "loss_iou": 0.20703125, "loss_num": 0.049560546875, "loss_xval": 0.66015625, "num_input_tokens_seen": 90418132, "step": 1443 }, { "epoch": 4.805324459234609, "grad_norm": 6.967924118041992, "learning_rate": 5e-06, "loss": 0.5027, "num_input_tokens_seen": 90481356, "step": 1444 }, { "epoch": 4.805324459234609, "loss": 0.3537171483039856, "loss_ce": 7.945661491248757e-05, "loss_iou": 0.10693359375, "loss_num": 0.0279541015625, "loss_xval": 0.353515625, "num_input_tokens_seen": 90481356, "step": 1444 }, { "epoch": 4.80865224625624, "grad_norm": 24.378341674804688, "learning_rate": 5e-06, "loss": 0.7154, "num_input_tokens_seen": 90544404, "step": 1445 }, { "epoch": 4.80865224625624, "loss": 0.7716976404190063, "loss_ce": 0.00045743631199002266, "loss_iou": 0.28125, "loss_num": 0.041748046875, "loss_xval": 0.76953125, "num_input_tokens_seen": 90544404, "step": 1445 }, { "epoch": 4.81198003327787, "grad_norm": 33.42325973510742, "learning_rate": 5e-06, "loss": 0.7386, "num_input_tokens_seen": 90607728, "step": 1446 }, { "epoch": 4.81198003327787, "loss": 0.9378975033760071, "loss_ce": 3.1257190130418167e-05, "loss_iou": 0.3125, "loss_num": 0.0625, "loss_xval": 0.9375, "num_input_tokens_seen": 90607728, "step": 1446 }, { "epoch": 4.815307820299501, "grad_norm": 10.282571792602539, "learning_rate": 5e-06, "loss": 0.6003, "num_input_tokens_seen": 90670312, "step": 1447 }, { "epoch": 4.815307820299501, "loss": 0.44258010387420654, "loss_ce": 7.520718645537272e-05, "loss_iou": 0.126953125, "loss_num": 0.03759765625, "loss_xval": 0.443359375, "num_input_tokens_seen": 90670312, "step": 1447 }, { "epoch": 4.818635607321132, "grad_norm": 9.647857666015625, "learning_rate": 5e-06, "loss": 0.8134, "num_input_tokens_seen": 90734576, "step": 1448 }, { "epoch": 4.818635607321132, "loss": 0.7873010635375977, "loss_ce": 0.0010461233323439956, "loss_iou": 0.287109375, "loss_num": 0.04248046875, "loss_xval": 0.78515625, "num_input_tokens_seen": 90734576, "step": 1448 }, { "epoch": 4.821963394342762, "grad_norm": 16.907331466674805, "learning_rate": 5e-06, "loss": 0.669, "num_input_tokens_seen": 90797412, "step": 1449 }, { "epoch": 4.821963394342762, "loss": 0.623419463634491, "loss_ce": 6.367723472067155e-06, "loss_iou": 0.23828125, "loss_num": 0.0294189453125, "loss_xval": 0.625, "num_input_tokens_seen": 90797412, "step": 1449 }, { "epoch": 4.825291181364393, "grad_norm": 24.30735969543457, "learning_rate": 5e-06, "loss": 0.756, "num_input_tokens_seen": 90858524, "step": 1450 }, { "epoch": 4.825291181364393, "loss": 0.6738710999488831, "loss_ce": 0.0005312650464475155, "loss_iou": 0.2138671875, "loss_num": 0.049072265625, "loss_xval": 0.671875, "num_input_tokens_seen": 90858524, "step": 1450 }, { "epoch": 4.8286189683860234, "grad_norm": 11.708526611328125, "learning_rate": 5e-06, "loss": 0.5307, "num_input_tokens_seen": 90921152, "step": 1451 }, { "epoch": 4.8286189683860234, "loss": 0.584744393825531, "loss_ce": 0.000760018068831414, "loss_iou": 0.16796875, "loss_num": 0.0498046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 90921152, "step": 1451 }, { "epoch": 4.831946755407654, "grad_norm": 22.66852378845215, "learning_rate": 5e-06, "loss": 0.806, "num_input_tokens_seen": 90984628, "step": 1452 }, { "epoch": 4.831946755407654, "loss": 0.6450674533843994, "loss_ce": 0.0005361848743632436, "loss_iou": 0.2412109375, "loss_num": 0.032470703125, "loss_xval": 0.64453125, "num_input_tokens_seen": 90984628, "step": 1452 }, { "epoch": 4.835274542429285, "grad_norm": 34.99443817138672, "learning_rate": 5e-06, "loss": 0.5531, "num_input_tokens_seen": 91045592, "step": 1453 }, { "epoch": 4.835274542429285, "loss": 0.597947895526886, "loss_ce": 4.751010055770166e-05, "loss_iou": 0.228515625, "loss_num": 0.0281982421875, "loss_xval": 0.59765625, "num_input_tokens_seen": 91045592, "step": 1453 }, { "epoch": 4.838602329450915, "grad_norm": 31.513866424560547, "learning_rate": 5e-06, "loss": 0.7581, "num_input_tokens_seen": 91108276, "step": 1454 }, { "epoch": 4.838602329450915, "loss": 0.5201447010040283, "loss_ce": 3.078287591051776e-06, "loss_iou": 0.1455078125, "loss_num": 0.0458984375, "loss_xval": 0.51953125, "num_input_tokens_seen": 91108276, "step": 1454 }, { "epoch": 4.841930116472546, "grad_norm": 26.065690994262695, "learning_rate": 5e-06, "loss": 0.6533, "num_input_tokens_seen": 91171596, "step": 1455 }, { "epoch": 4.841930116472546, "loss": 0.7839912176132202, "loss_ce": 0.0002998454438056797, "loss_iou": 0.2734375, "loss_num": 0.04736328125, "loss_xval": 0.78515625, "num_input_tokens_seen": 91171596, "step": 1455 }, { "epoch": 4.8452579034941765, "grad_norm": 9.963427543640137, "learning_rate": 5e-06, "loss": 0.9933, "num_input_tokens_seen": 91234328, "step": 1456 }, { "epoch": 4.8452579034941765, "loss": 1.1222238540649414, "loss_ce": 0.0006418825942091644, "loss_iou": 0.384765625, "loss_num": 0.0703125, "loss_xval": 1.125, "num_input_tokens_seen": 91234328, "step": 1456 }, { "epoch": 4.848585690515807, "grad_norm": 17.40574836730957, "learning_rate": 5e-06, "loss": 0.7954, "num_input_tokens_seen": 91298288, "step": 1457 }, { "epoch": 4.848585690515807, "loss": 0.5493208765983582, "loss_ce": 4.492412699619308e-06, "loss_iou": 0.21875, "loss_num": 0.022216796875, "loss_xval": 0.55078125, "num_input_tokens_seen": 91298288, "step": 1457 }, { "epoch": 4.851913477537438, "grad_norm": 36.004634857177734, "learning_rate": 5e-06, "loss": 0.507, "num_input_tokens_seen": 91360532, "step": 1458 }, { "epoch": 4.851913477537438, "loss": 0.29166799783706665, "loss_ce": 0.0007134195184335113, "loss_iou": 0.0390625, "loss_num": 0.04248046875, "loss_xval": 0.291015625, "num_input_tokens_seen": 91360532, "step": 1458 }, { "epoch": 4.855241264559068, "grad_norm": 24.00579071044922, "learning_rate": 5e-06, "loss": 0.6614, "num_input_tokens_seen": 91423224, "step": 1459 }, { "epoch": 4.855241264559068, "loss": 0.8000708222389221, "loss_ce": 0.0012427503243088722, "loss_iou": 0.2470703125, "loss_num": 0.061279296875, "loss_xval": 0.796875, "num_input_tokens_seen": 91423224, "step": 1459 }, { "epoch": 4.858569051580699, "grad_norm": 7.054749011993408, "learning_rate": 5e-06, "loss": 0.7916, "num_input_tokens_seen": 91486976, "step": 1460 }, { "epoch": 4.858569051580699, "loss": 0.6811645030975342, "loss_ce": 0.00037837924901396036, "loss_iou": 0.2197265625, "loss_num": 0.048095703125, "loss_xval": 0.6796875, "num_input_tokens_seen": 91486976, "step": 1460 }, { "epoch": 4.86189683860233, "grad_norm": 15.921218872070312, "learning_rate": 5e-06, "loss": 0.8073, "num_input_tokens_seen": 91549948, "step": 1461 }, { "epoch": 4.86189683860233, "loss": 1.1966166496276855, "loss_ce": 0.0005717898602597415, "loss_iou": 0.41015625, "loss_num": 0.07568359375, "loss_xval": 1.1953125, "num_input_tokens_seen": 91549948, "step": 1461 }, { "epoch": 4.86522462562396, "grad_norm": 15.619834899902344, "learning_rate": 5e-06, "loss": 0.7384, "num_input_tokens_seen": 91612528, "step": 1462 }, { "epoch": 4.86522462562396, "loss": 0.8267630338668823, "loss_ce": 0.0005911277839913964, "loss_iou": 0.28515625, "loss_num": 0.051513671875, "loss_xval": 0.828125, "num_input_tokens_seen": 91612528, "step": 1462 }, { "epoch": 4.868552412645591, "grad_norm": 10.34393310546875, "learning_rate": 5e-06, "loss": 0.7152, "num_input_tokens_seen": 91674264, "step": 1463 }, { "epoch": 4.868552412645591, "loss": 0.6649792790412903, "loss_ce": 0.0007947350386530161, "loss_iou": 0.2333984375, "loss_num": 0.039306640625, "loss_xval": 0.6640625, "num_input_tokens_seen": 91674264, "step": 1463 }, { "epoch": 4.871880199667221, "grad_norm": 8.09216022491455, "learning_rate": 5e-06, "loss": 0.471, "num_input_tokens_seen": 91737116, "step": 1464 }, { "epoch": 4.871880199667221, "loss": 0.45063310861587524, "loss_ce": 0.00019367330241948366, "loss_iou": 0.1611328125, "loss_num": 0.025634765625, "loss_xval": 0.451171875, "num_input_tokens_seen": 91737116, "step": 1464 }, { "epoch": 4.875207986688852, "grad_norm": 17.690427780151367, "learning_rate": 5e-06, "loss": 0.7025, "num_input_tokens_seen": 91799080, "step": 1465 }, { "epoch": 4.875207986688852, "loss": 0.6187844276428223, "loss_ce": 1.0004376235883683e-05, "loss_iou": 0.2236328125, "loss_num": 0.034423828125, "loss_xval": 0.6171875, "num_input_tokens_seen": 91799080, "step": 1465 }, { "epoch": 4.878535773710483, "grad_norm": 19.25313377380371, "learning_rate": 5e-06, "loss": 0.6483, "num_input_tokens_seen": 91860988, "step": 1466 }, { "epoch": 4.878535773710483, "loss": 0.559953510761261, "loss_ce": 0.00020009189029224217, "loss_iou": 0.15234375, "loss_num": 0.051025390625, "loss_xval": 0.55859375, "num_input_tokens_seen": 91860988, "step": 1466 }, { "epoch": 4.881863560732113, "grad_norm": 10.655545234680176, "learning_rate": 5e-06, "loss": 0.7747, "num_input_tokens_seen": 91924968, "step": 1467 }, { "epoch": 4.881863560732113, "loss": 0.636091947555542, "loss_ce": 0.00034974643494933844, "loss_iou": 0.26171875, "loss_num": 0.0225830078125, "loss_xval": 0.63671875, "num_input_tokens_seen": 91924968, "step": 1467 }, { "epoch": 4.885191347753744, "grad_norm": 20.099782943725586, "learning_rate": 5e-06, "loss": 0.7424, "num_input_tokens_seen": 91988252, "step": 1468 }, { "epoch": 4.885191347753744, "loss": 0.5978015661239624, "loss_ce": 0.0008777122711762786, "loss_iou": 0.2265625, "loss_num": 0.028564453125, "loss_xval": 0.59765625, "num_input_tokens_seen": 91988252, "step": 1468 }, { "epoch": 4.8885191347753745, "grad_norm": 17.030847549438477, "learning_rate": 5e-06, "loss": 0.6069, "num_input_tokens_seen": 92049740, "step": 1469 }, { "epoch": 4.8885191347753745, "loss": 0.7965136766433716, "loss_ce": 4.913673365081195e-06, "loss_iou": 0.24609375, "loss_num": 0.060791015625, "loss_xval": 0.796875, "num_input_tokens_seen": 92049740, "step": 1469 }, { "epoch": 4.891846921797005, "grad_norm": 17.971389770507812, "learning_rate": 5e-06, "loss": 0.7334, "num_input_tokens_seen": 92112536, "step": 1470 }, { "epoch": 4.891846921797005, "loss": 0.6343286037445068, "loss_ce": 0.0005395347252488136, "loss_iou": 0.1923828125, "loss_num": 0.050048828125, "loss_xval": 0.6328125, "num_input_tokens_seen": 92112536, "step": 1470 }, { "epoch": 4.895174708818636, "grad_norm": 23.792451858520508, "learning_rate": 5e-06, "loss": 0.8228, "num_input_tokens_seen": 92176628, "step": 1471 }, { "epoch": 4.895174708818636, "loss": 0.6928781270980835, "loss_ce": 7.047529834380839e-06, "loss_iou": 0.263671875, "loss_num": 0.033447265625, "loss_xval": 0.69140625, "num_input_tokens_seen": 92176628, "step": 1471 }, { "epoch": 4.898502495840266, "grad_norm": 10.210338592529297, "learning_rate": 5e-06, "loss": 0.5654, "num_input_tokens_seen": 92238496, "step": 1472 }, { "epoch": 4.898502495840266, "loss": 0.5062616467475891, "loss_ce": 0.0018671302823349833, "loss_iou": 0.16796875, "loss_num": 0.033935546875, "loss_xval": 0.50390625, "num_input_tokens_seen": 92238496, "step": 1472 }, { "epoch": 4.901830282861897, "grad_norm": 27.42046546936035, "learning_rate": 5e-06, "loss": 0.7748, "num_input_tokens_seen": 92301304, "step": 1473 }, { "epoch": 4.901830282861897, "loss": 0.6527865529060364, "loss_ce": 0.00019865072681568563, "loss_iou": 0.2099609375, "loss_num": 0.046630859375, "loss_xval": 0.65234375, "num_input_tokens_seen": 92301304, "step": 1473 }, { "epoch": 4.9051580698835275, "grad_norm": 24.284852981567383, "learning_rate": 5e-06, "loss": 0.6917, "num_input_tokens_seen": 92363496, "step": 1474 }, { "epoch": 4.9051580698835275, "loss": 0.5194106101989746, "loss_ce": 0.00018454447854310274, "loss_iou": 0.158203125, "loss_num": 0.040283203125, "loss_xval": 0.51953125, "num_input_tokens_seen": 92363496, "step": 1474 }, { "epoch": 4.908485856905158, "grad_norm": 9.942500114440918, "learning_rate": 5e-06, "loss": 0.5869, "num_input_tokens_seen": 92426528, "step": 1475 }, { "epoch": 4.908485856905158, "loss": 0.43600332736968994, "loss_ce": 0.0009447159245610237, "loss_iou": 0.0732421875, "loss_num": 0.0576171875, "loss_xval": 0.435546875, "num_input_tokens_seen": 92426528, "step": 1475 }, { "epoch": 4.911813643926789, "grad_norm": 12.120987892150879, "learning_rate": 5e-06, "loss": 0.5949, "num_input_tokens_seen": 92488064, "step": 1476 }, { "epoch": 4.911813643926789, "loss": 0.6362853050231934, "loss_ce": 0.0012755857314914465, "loss_iou": 0.2216796875, "loss_num": 0.0380859375, "loss_xval": 0.63671875, "num_input_tokens_seen": 92488064, "step": 1476 }, { "epoch": 4.915141430948419, "grad_norm": 21.779373168945312, "learning_rate": 5e-06, "loss": 0.5642, "num_input_tokens_seen": 92550804, "step": 1477 }, { "epoch": 4.915141430948419, "loss": 0.5219756960868835, "loss_ce": 3.0526402952091303e-06, "loss_iou": 0.2099609375, "loss_num": 0.0205078125, "loss_xval": 0.5234375, "num_input_tokens_seen": 92550804, "step": 1477 }, { "epoch": 4.91846921797005, "grad_norm": 41.83907699584961, "learning_rate": 5e-06, "loss": 0.8394, "num_input_tokens_seen": 92614052, "step": 1478 }, { "epoch": 4.91846921797005, "loss": 0.7585424780845642, "loss_ce": 0.0014623773749917746, "loss_iou": 0.306640625, "loss_num": 0.0286865234375, "loss_xval": 0.7578125, "num_input_tokens_seen": 92614052, "step": 1478 }, { "epoch": 4.921797004991681, "grad_norm": 23.484262466430664, "learning_rate": 5e-06, "loss": 0.7083, "num_input_tokens_seen": 92676992, "step": 1479 }, { "epoch": 4.921797004991681, "loss": 0.45134830474853516, "loss_ce": 0.000298495520837605, "loss_iou": 0.15625, "loss_num": 0.0277099609375, "loss_xval": 0.451171875, "num_input_tokens_seen": 92676992, "step": 1479 }, { "epoch": 4.925124792013311, "grad_norm": 16.01560401916504, "learning_rate": 5e-06, "loss": 0.7104, "num_input_tokens_seen": 92739476, "step": 1480 }, { "epoch": 4.925124792013311, "loss": 0.7750359773635864, "loss_ce": 1.1565551176317967e-05, "loss_iou": 0.2890625, "loss_num": 0.039794921875, "loss_xval": 0.7734375, "num_input_tokens_seen": 92739476, "step": 1480 }, { "epoch": 4.928452579034942, "grad_norm": 14.046684265136719, "learning_rate": 5e-06, "loss": 0.5505, "num_input_tokens_seen": 92801844, "step": 1481 }, { "epoch": 4.928452579034942, "loss": 0.6174169778823853, "loss_ce": 0.000961879501119256, "loss_iou": 0.1796875, "loss_num": 0.051513671875, "loss_xval": 0.6171875, "num_input_tokens_seen": 92801844, "step": 1481 }, { "epoch": 4.931780366056572, "grad_norm": 24.918357849121094, "learning_rate": 5e-06, "loss": 0.6422, "num_input_tokens_seen": 92864440, "step": 1482 }, { "epoch": 4.931780366056572, "loss": 0.5476626753807068, "loss_ce": 0.003900481853634119, "loss_iou": 0.1337890625, "loss_num": 0.055419921875, "loss_xval": 0.54296875, "num_input_tokens_seen": 92864440, "step": 1482 }, { "epoch": 4.935108153078203, "grad_norm": 12.272085189819336, "learning_rate": 5e-06, "loss": 0.5356, "num_input_tokens_seen": 92926160, "step": 1483 }, { "epoch": 4.935108153078203, "loss": 0.41046756505966187, "loss_ce": 0.0007995814085006714, "loss_iou": 0.1298828125, "loss_num": 0.0299072265625, "loss_xval": 0.41015625, "num_input_tokens_seen": 92926160, "step": 1483 }, { "epoch": 4.938435940099834, "grad_norm": 14.432737350463867, "learning_rate": 5e-06, "loss": 0.8823, "num_input_tokens_seen": 92990268, "step": 1484 }, { "epoch": 4.938435940099834, "loss": 0.904925525188446, "loss_ce": 0.0006286612479016185, "loss_iou": 0.388671875, "loss_num": 0.02587890625, "loss_xval": 0.90625, "num_input_tokens_seen": 92990268, "step": 1484 }, { "epoch": 4.941763727121464, "grad_norm": 28.021799087524414, "learning_rate": 5e-06, "loss": 0.6162, "num_input_tokens_seen": 93053936, "step": 1485 }, { "epoch": 4.941763727121464, "loss": 0.40271496772766113, "loss_ce": 5.004140803066548e-06, "loss_iou": 0.1552734375, "loss_num": 0.0186767578125, "loss_xval": 0.40234375, "num_input_tokens_seen": 93053936, "step": 1485 }, { "epoch": 4.945091514143095, "grad_norm": 36.70539093017578, "learning_rate": 5e-06, "loss": 0.7026, "num_input_tokens_seen": 93117408, "step": 1486 }, { "epoch": 4.945091514143095, "loss": 0.7068368196487427, "loss_ce": 0.0009041887824423611, "loss_iou": 0.21484375, "loss_num": 0.05517578125, "loss_xval": 0.70703125, "num_input_tokens_seen": 93117408, "step": 1486 }, { "epoch": 4.9484193011647255, "grad_norm": 34.976402282714844, "learning_rate": 5e-06, "loss": 0.6626, "num_input_tokens_seen": 93179780, "step": 1487 }, { "epoch": 4.9484193011647255, "loss": 0.5312550067901611, "loss_ce": 5.038962626713328e-06, "loss_iou": 0.1689453125, "loss_num": 0.03857421875, "loss_xval": 0.53125, "num_input_tokens_seen": 93179780, "step": 1487 }, { "epoch": 4.951747088186356, "grad_norm": 14.874862670898438, "learning_rate": 5e-06, "loss": 0.583, "num_input_tokens_seen": 93242380, "step": 1488 }, { "epoch": 4.951747088186356, "loss": 0.5796291828155518, "loss_ce": 3.933107655029744e-05, "loss_iou": 0.1943359375, "loss_num": 0.0380859375, "loss_xval": 0.578125, "num_input_tokens_seen": 93242380, "step": 1488 }, { "epoch": 4.955074875207987, "grad_norm": 14.622214317321777, "learning_rate": 5e-06, "loss": 0.5826, "num_input_tokens_seen": 93305320, "step": 1489 }, { "epoch": 4.955074875207987, "loss": 0.746558666229248, "loss_ce": 3.767916496144608e-05, "loss_iou": 0.25390625, "loss_num": 0.0478515625, "loss_xval": 0.74609375, "num_input_tokens_seen": 93305320, "step": 1489 }, { "epoch": 4.958402662229617, "grad_norm": 20.690074920654297, "learning_rate": 5e-06, "loss": 0.8156, "num_input_tokens_seen": 93367680, "step": 1490 }, { "epoch": 4.958402662229617, "loss": 0.8623336553573608, "loss_ce": 0.0005172833916731179, "loss_iou": 0.28125, "loss_num": 0.060302734375, "loss_xval": 0.86328125, "num_input_tokens_seen": 93367680, "step": 1490 }, { "epoch": 4.961730449251248, "grad_norm": 19.866193771362305, "learning_rate": 5e-06, "loss": 0.6329, "num_input_tokens_seen": 93429344, "step": 1491 }, { "epoch": 4.961730449251248, "loss": 0.5215956568717957, "loss_ce": 0.00011129368795081973, "loss_iou": 0.15234375, "loss_num": 0.04345703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 93429344, "step": 1491 }, { "epoch": 4.965058236272879, "grad_norm": 14.48011589050293, "learning_rate": 5e-06, "loss": 0.9734, "num_input_tokens_seen": 93493372, "step": 1492 }, { "epoch": 4.965058236272879, "loss": 0.8466031551361084, "loss_ce": 0.0009000533609651029, "loss_iou": 0.275390625, "loss_num": 0.058837890625, "loss_xval": 0.84375, "num_input_tokens_seen": 93493372, "step": 1492 }, { "epoch": 4.968386023294509, "grad_norm": 12.389137268066406, "learning_rate": 5e-06, "loss": 0.8139, "num_input_tokens_seen": 93556592, "step": 1493 }, { "epoch": 4.968386023294509, "loss": 0.672704815864563, "loss_ce": 0.0007077268091961741, "loss_iou": 0.21875, "loss_num": 0.046875, "loss_xval": 0.671875, "num_input_tokens_seen": 93556592, "step": 1493 }, { "epoch": 4.97171381031614, "grad_norm": 14.919951438903809, "learning_rate": 5e-06, "loss": 0.6283, "num_input_tokens_seen": 93619256, "step": 1494 }, { "epoch": 4.97171381031614, "loss": 0.4168055057525635, "loss_ce": 0.00017951334302779287, "loss_iou": 0.1123046875, "loss_num": 0.038330078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 93619256, "step": 1494 }, { "epoch": 4.97504159733777, "grad_norm": 34.406837463378906, "learning_rate": 5e-06, "loss": 0.7431, "num_input_tokens_seen": 93681420, "step": 1495 }, { "epoch": 4.97504159733777, "loss": 0.7975531220436096, "loss_ce": 0.0037908926606178284, "loss_iou": 0.279296875, "loss_num": 0.046875, "loss_xval": 0.79296875, "num_input_tokens_seen": 93681420, "step": 1495 }, { "epoch": 4.978369384359401, "grad_norm": 16.960229873657227, "learning_rate": 5e-06, "loss": 0.8396, "num_input_tokens_seen": 93744272, "step": 1496 }, { "epoch": 4.978369384359401, "loss": 1.021630048751831, "loss_ce": 0.00038986955769360065, "loss_iou": 0.3984375, "loss_num": 0.044677734375, "loss_xval": 1.0234375, "num_input_tokens_seen": 93744272, "step": 1496 }, { "epoch": 4.981697171381032, "grad_norm": 12.859408378601074, "learning_rate": 5e-06, "loss": 0.7763, "num_input_tokens_seen": 93806256, "step": 1497 }, { "epoch": 4.981697171381032, "loss": 0.5506644248962402, "loss_ce": 0.001958362990990281, "loss_iou": 0.1806640625, "loss_num": 0.037841796875, "loss_xval": 0.546875, "num_input_tokens_seen": 93806256, "step": 1497 }, { "epoch": 4.985024958402662, "grad_norm": 31.596189498901367, "learning_rate": 5e-06, "loss": 0.69, "num_input_tokens_seen": 93869616, "step": 1498 }, { "epoch": 4.985024958402662, "loss": 0.746276319026947, "loss_ce": 0.00018256355542689562, "loss_iou": 0.267578125, "loss_num": 0.042236328125, "loss_xval": 0.74609375, "num_input_tokens_seen": 93869616, "step": 1498 }, { "epoch": 4.988352745424293, "grad_norm": 24.65460777282715, "learning_rate": 5e-06, "loss": 0.7051, "num_input_tokens_seen": 93931152, "step": 1499 }, { "epoch": 4.988352745424293, "loss": 0.7398415207862854, "loss_ce": 0.00033956102561205626, "loss_iou": 0.26953125, "loss_num": 0.0400390625, "loss_xval": 0.73828125, "num_input_tokens_seen": 93931152, "step": 1499 }, { "epoch": 4.9916805324459235, "grad_norm": 10.513225555419922, "learning_rate": 5e-06, "loss": 0.5617, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_seeclick_CIoU": 0.12539703398942947, "eval_seeclick_GIoU": 0.1438598707318306, "eval_seeclick_IoU": 0.22447702288627625, "eval_seeclick_MAE_all": 0.1706928089261055, "eval_seeclick_MAE_h": 0.05414240434765816, "eval_seeclick_MAE_w": 0.1219484768807888, "eval_seeclick_MAE_x_boxes": 0.24619252979755402, "eval_seeclick_MAE_y_boxes": 0.1315431222319603, "eval_seeclick_NUM_probability": 0.9974748492240906, "eval_seeclick_inside_bbox": 0.30520834028720856, "eval_seeclick_loss": 2.665666341781616, "eval_seeclick_loss_ce": 0.10174492001533508, "eval_seeclick_loss_iou": 0.855712890625, "eval_seeclick_loss_num": 0.16909027099609375, "eval_seeclick_loss_xval": 2.556640625, "eval_seeclick_runtime": 65.5562, "eval_seeclick_samples_per_second": 0.717, "eval_seeclick_steps_per_second": 0.031, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_icons_CIoU": 0.04876134404912591, "eval_icons_GIoU": 0.15376271307468414, "eval_icons_IoU": 0.20101643353700638, "eval_icons_MAE_all": 0.13954433798789978, "eval_icons_MAE_h": 0.061773356050252914, "eval_icons_MAE_w": 0.15250807255506516, "eval_icons_MAE_x_boxes": 0.1526419073343277, "eval_icons_MAE_y_boxes": 0.03896789811551571, "eval_icons_NUM_probability": 0.9999941885471344, "eval_icons_inside_bbox": 0.3263888955116272, "eval_icons_loss": 2.3821887969970703, "eval_icons_loss_ce": 1.977284341592167e-06, "eval_icons_loss_iou": 0.833984375, "eval_icons_loss_num": 0.14191246032714844, "eval_icons_loss_xval": 2.3779296875, "eval_icons_runtime": 76.6569, "eval_icons_samples_per_second": 0.652, "eval_icons_steps_per_second": 0.026, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_screenspot_CIoU": 0.05785238494475683, "eval_screenspot_GIoU": 0.11830167099833488, "eval_screenspot_IoU": 0.20486934979756674, "eval_screenspot_MAE_all": 0.1998411019643148, "eval_screenspot_MAE_h": 0.07882813364267349, "eval_screenspot_MAE_w": 0.1679569110274315, "eval_screenspot_MAE_x_boxes": 0.26309264699618023, "eval_screenspot_MAE_y_boxes": 0.14158939321835837, "eval_screenspot_NUM_probability": 0.999947190284729, "eval_screenspot_inside_bbox": 0.34166666865348816, "eval_screenspot_loss": 2.7864136695861816, "eval_screenspot_loss_ce": 7.061872323295877e-05, "eval_screenspot_loss_iou": 0.8956705729166666, "eval_screenspot_loss_num": 0.204803466796875, "eval_screenspot_loss_xval": 2.8151041666666665, "eval_screenspot_runtime": 118.0213, "eval_screenspot_samples_per_second": 0.754, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_compot_CIoU": -0.00016517378389835358, "eval_compot_GIoU": 0.10418009385466576, "eval_compot_IoU": 0.16831143200397491, "eval_compot_MAE_all": 0.18598458915948868, "eval_compot_MAE_h": 0.07219501584768295, "eval_compot_MAE_w": 0.24491457641124725, "eval_compot_MAE_x_boxes": 0.182175874710083, "eval_compot_MAE_y_boxes": 0.06786558777093887, "eval_compot_NUM_probability": 0.9999881684780121, "eval_compot_inside_bbox": 0.2986111119389534, "eval_compot_loss": 2.7441301345825195, "eval_compot_loss_ce": 0.004642534884624183, "eval_compot_loss_iou": 0.90576171875, "eval_compot_loss_num": 0.1902923583984375, "eval_compot_loss_xval": 2.7626953125, "eval_compot_runtime": 67.269, "eval_compot_samples_per_second": 0.743, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_custom_ui_MAE_all": 0.07861483469605446, "eval_custom_ui_MAE_x": 0.07045421935617924, "eval_custom_ui_MAE_y": 0.08677546679973602, "eval_custom_ui_NUM_probability": 0.9999936819076538, "eval_custom_ui_loss": 0.37946105003356934, "eval_custom_ui_loss_ce": 9.375693934998708e-06, "eval_custom_ui_loss_num": 0.0741729736328125, "eval_custom_ui_loss_xval": 0.37078857421875, "eval_custom_ui_runtime": 51.6085, "eval_custom_ui_samples_per_second": 0.969, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.9916805324459235, "loss": 0.3743995428085327, "loss_ce": 9.905661499942653e-06, "loss_iou": 0.0, "loss_num": 0.07470703125, "loss_xval": 0.375, "num_input_tokens_seen": 93994508, "step": 1500 }, { "epoch": 4.995008319467554, "grad_norm": 24.133996963500977, "learning_rate": 5e-06, "loss": 0.5173, "num_input_tokens_seen": 94056528, "step": 1501 }, { "epoch": 4.995008319467554, "loss": 0.4280321002006531, "loss_ce": 5.3581257816404104e-05, "loss_iou": 0.1318359375, "loss_num": 0.03271484375, "loss_xval": 0.427734375, "num_input_tokens_seen": 94056528, "step": 1501 }, { "epoch": 4.998336106489185, "grad_norm": 13.59617805480957, "learning_rate": 5e-06, "loss": 0.7287, "num_input_tokens_seen": 94120456, "step": 1502 }, { "epoch": 4.998336106489185, "loss": 0.8299544453620911, "loss_ce": 0.0006087642977945507, "loss_iou": 0.2734375, "loss_num": 0.05615234375, "loss_xval": 0.828125, "num_input_tokens_seen": 94120456, "step": 1502 }, { "epoch": 4.998336106489185, "loss": 0.8235313296318054, "loss_ce": 4.499550414038822e-05, "loss_iou": 0.27734375, "loss_num": 0.05419921875, "loss_xval": 0.82421875, "num_input_tokens_seen": 94152608, "step": 1502 }, { "epoch": 5.001663893510815, "grad_norm": 14.112279891967773, "learning_rate": 5e-06, "loss": 0.7359, "num_input_tokens_seen": 94184688, "step": 1503 }, { "epoch": 5.001663893510815, "loss": 0.6481797695159912, "loss_ce": 0.0002305309899384156, "loss_iou": 0.251953125, "loss_num": 0.028564453125, "loss_xval": 0.6484375, "num_input_tokens_seen": 94184688, "step": 1503 }, { "epoch": 5.004991680532446, "grad_norm": 26.775850296020508, "learning_rate": 5e-06, "loss": 0.8273, "num_input_tokens_seen": 94250036, "step": 1504 }, { "epoch": 5.004991680532446, "loss": 0.9145175814628601, "loss_ce": 0.0004551278834696859, "loss_iou": 0.3515625, "loss_num": 0.042236328125, "loss_xval": 0.9140625, "num_input_tokens_seen": 94250036, "step": 1504 }, { "epoch": 5.0083194675540765, "grad_norm": 11.916851997375488, "learning_rate": 5e-06, "loss": 0.6772, "num_input_tokens_seen": 94313792, "step": 1505 }, { "epoch": 5.0083194675540765, "loss": 0.7098668217658997, "loss_ce": 0.0008824262768030167, "loss_iou": 0.255859375, "loss_num": 0.03955078125, "loss_xval": 0.7109375, "num_input_tokens_seen": 94313792, "step": 1505 }, { "epoch": 5.011647254575707, "grad_norm": 8.042437553405762, "learning_rate": 5e-06, "loss": 0.7836, "num_input_tokens_seen": 94375952, "step": 1506 }, { "epoch": 5.011647254575707, "loss": 0.8639097809791565, "loss_ce": 0.000384394807042554, "loss_iou": 0.2734375, "loss_num": 0.06396484375, "loss_xval": 0.86328125, "num_input_tokens_seen": 94375952, "step": 1506 }, { "epoch": 5.014975041597338, "grad_norm": 15.622085571289062, "learning_rate": 5e-06, "loss": 0.8583, "num_input_tokens_seen": 94440520, "step": 1507 }, { "epoch": 5.014975041597338, "loss": 1.238585114479065, "loss_ce": 0.0032335498835891485, "loss_iou": 0.46484375, "loss_num": 0.06103515625, "loss_xval": 1.234375, "num_input_tokens_seen": 94440520, "step": 1507 }, { "epoch": 5.018302828618968, "grad_norm": 31.38747787475586, "learning_rate": 5e-06, "loss": 0.7085, "num_input_tokens_seen": 94500620, "step": 1508 }, { "epoch": 5.018302828618968, "loss": 0.5411911010742188, "loss_ce": 0.00041960144881159067, "loss_iou": 0.1875, "loss_num": 0.033203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 94500620, "step": 1508 }, { "epoch": 5.021630615640599, "grad_norm": 24.40467643737793, "learning_rate": 5e-06, "loss": 0.755, "num_input_tokens_seen": 94563172, "step": 1509 }, { "epoch": 5.021630615640599, "loss": 0.886728048324585, "loss_ce": 0.0011079427786171436, "loss_iou": 0.29296875, "loss_num": 0.059814453125, "loss_xval": 0.88671875, "num_input_tokens_seen": 94563172, "step": 1509 }, { "epoch": 5.02495840266223, "grad_norm": 28.08826446533203, "learning_rate": 5e-06, "loss": 0.7189, "num_input_tokens_seen": 94626456, "step": 1510 }, { "epoch": 5.02495840266223, "loss": 0.5464110374450684, "loss_ce": 0.0007567618158645928, "loss_iou": 0.20703125, "loss_num": 0.0262451171875, "loss_xval": 0.546875, "num_input_tokens_seen": 94626456, "step": 1510 }, { "epoch": 5.02828618968386, "grad_norm": 32.060081481933594, "learning_rate": 5e-06, "loss": 0.6385, "num_input_tokens_seen": 94689628, "step": 1511 }, { "epoch": 5.02828618968386, "loss": 0.6361727714538574, "loss_ce": 0.00018641223141457886, "loss_iou": 0.2275390625, "loss_num": 0.0361328125, "loss_xval": 0.63671875, "num_input_tokens_seen": 94689628, "step": 1511 }, { "epoch": 5.031613976705491, "grad_norm": 28.850717544555664, "learning_rate": 5e-06, "loss": 0.6486, "num_input_tokens_seen": 94752952, "step": 1512 }, { "epoch": 5.031613976705491, "loss": 0.6386792659759521, "loss_ce": 7.388218818960013e-06, "loss_iou": 0.2373046875, "loss_num": 0.032958984375, "loss_xval": 0.640625, "num_input_tokens_seen": 94752952, "step": 1512 }, { "epoch": 5.034941763727121, "grad_norm": 26.582441329956055, "learning_rate": 5e-06, "loss": 0.7219, "num_input_tokens_seen": 94816220, "step": 1513 }, { "epoch": 5.034941763727121, "loss": 0.8012294173240662, "loss_ce": 0.0014247273793444037, "loss_iou": 0.28515625, "loss_num": 0.046142578125, "loss_xval": 0.80078125, "num_input_tokens_seen": 94816220, "step": 1513 }, { "epoch": 5.038269550748752, "grad_norm": 13.041583061218262, "learning_rate": 5e-06, "loss": 0.7812, "num_input_tokens_seen": 94878556, "step": 1514 }, { "epoch": 5.038269550748752, "loss": 0.8982586860656738, "loss_ce": 6.535501597682014e-05, "loss_iou": 0.26953125, "loss_num": 0.07177734375, "loss_xval": 0.8984375, "num_input_tokens_seen": 94878556, "step": 1514 }, { "epoch": 5.041597337770383, "grad_norm": 41.85260009765625, "learning_rate": 5e-06, "loss": 0.6035, "num_input_tokens_seen": 94940876, "step": 1515 }, { "epoch": 5.041597337770383, "loss": 0.6592752933502197, "loss_ce": 0.001194267300888896, "loss_iou": 0.26171875, "loss_num": 0.027099609375, "loss_xval": 0.65625, "num_input_tokens_seen": 94940876, "step": 1515 }, { "epoch": 5.044925124792013, "grad_norm": 33.620452880859375, "learning_rate": 5e-06, "loss": 0.8496, "num_input_tokens_seen": 95004780, "step": 1516 }, { "epoch": 5.044925124792013, "loss": 1.0527219772338867, "loss_ce": 0.000597875623498112, "loss_iou": 0.345703125, "loss_num": 0.07275390625, "loss_xval": 1.0546875, "num_input_tokens_seen": 95004780, "step": 1516 }, { "epoch": 5.048252911813644, "grad_norm": 14.673892974853516, "learning_rate": 5e-06, "loss": 0.5555, "num_input_tokens_seen": 95067216, "step": 1517 }, { "epoch": 5.048252911813644, "loss": 0.41764336824417114, "loss_ce": 0.0001018413677229546, "loss_iou": 0.150390625, "loss_num": 0.0233154296875, "loss_xval": 0.41796875, "num_input_tokens_seen": 95067216, "step": 1517 }, { "epoch": 5.0515806988352745, "grad_norm": 10.968241691589355, "learning_rate": 5e-06, "loss": 0.6427, "num_input_tokens_seen": 95129008, "step": 1518 }, { "epoch": 5.0515806988352745, "loss": 0.5915021896362305, "loss_ce": 0.0006818820256739855, "loss_iou": 0.1943359375, "loss_num": 0.04052734375, "loss_xval": 0.58984375, "num_input_tokens_seen": 95129008, "step": 1518 }, { "epoch": 5.054908485856905, "grad_norm": 26.52812957763672, "learning_rate": 5e-06, "loss": 0.7537, "num_input_tokens_seen": 95192232, "step": 1519 }, { "epoch": 5.054908485856905, "loss": 0.903398871421814, "loss_ce": 0.0005668269586749375, "loss_iou": 0.330078125, "loss_num": 0.048828125, "loss_xval": 0.90234375, "num_input_tokens_seen": 95192232, "step": 1519 }, { "epoch": 5.058236272878536, "grad_norm": 30.482860565185547, "learning_rate": 5e-06, "loss": 0.8095, "num_input_tokens_seen": 95256580, "step": 1520 }, { "epoch": 5.058236272878536, "loss": 0.7557247877120972, "loss_ce": 0.0013303017476573586, "loss_iou": 0.259765625, "loss_num": 0.047119140625, "loss_xval": 0.75390625, "num_input_tokens_seen": 95256580, "step": 1520 }, { "epoch": 5.061564059900166, "grad_norm": 39.979736328125, "learning_rate": 5e-06, "loss": 0.6808, "num_input_tokens_seen": 95318464, "step": 1521 }, { "epoch": 5.061564059900166, "loss": 0.7826014161109924, "loss_ce": 8.651560165162664e-06, "loss_iou": 0.310546875, "loss_num": 0.03271484375, "loss_xval": 0.78125, "num_input_tokens_seen": 95318464, "step": 1521 }, { "epoch": 5.064891846921797, "grad_norm": 14.766712188720703, "learning_rate": 5e-06, "loss": 0.5086, "num_input_tokens_seen": 95379756, "step": 1522 }, { "epoch": 5.064891846921797, "loss": 0.5393363237380981, "loss_ce": 0.0009147044620476663, "loss_iou": 0.20703125, "loss_num": 0.025146484375, "loss_xval": 0.5390625, "num_input_tokens_seen": 95379756, "step": 1522 }, { "epoch": 5.068219633943428, "grad_norm": 7.251678466796875, "learning_rate": 5e-06, "loss": 0.4442, "num_input_tokens_seen": 95441884, "step": 1523 }, { "epoch": 5.068219633943428, "loss": 0.4630827307701111, "loss_ce": 0.00031416656565852463, "loss_iou": 0.10546875, "loss_num": 0.050537109375, "loss_xval": 0.462890625, "num_input_tokens_seen": 95441884, "step": 1523 }, { "epoch": 5.071547420965058, "grad_norm": 6.830024242401123, "learning_rate": 5e-06, "loss": 0.961, "num_input_tokens_seen": 95505028, "step": 1524 }, { "epoch": 5.071547420965058, "loss": 0.8202039003372192, "loss_ce": 0.0006237871712073684, "loss_iou": 0.298828125, "loss_num": 0.0439453125, "loss_xval": 0.8203125, "num_input_tokens_seen": 95505028, "step": 1524 }, { "epoch": 5.074875207986689, "grad_norm": 21.969810485839844, "learning_rate": 5e-06, "loss": 1.14, "num_input_tokens_seen": 95568468, "step": 1525 }, { "epoch": 5.074875207986689, "loss": 0.9151379466056824, "loss_ce": 0.0004040507774334401, "loss_iou": 0.32421875, "loss_num": 0.053466796875, "loss_xval": 0.9140625, "num_input_tokens_seen": 95568468, "step": 1525 }, { "epoch": 5.078202995008319, "grad_norm": 57.692718505859375, "learning_rate": 5e-06, "loss": 0.642, "num_input_tokens_seen": 95631624, "step": 1526 }, { "epoch": 5.078202995008319, "loss": 0.7038395404815674, "loss_ce": 0.00010419228055980057, "loss_iou": 0.25390625, "loss_num": 0.039306640625, "loss_xval": 0.703125, "num_input_tokens_seen": 95631624, "step": 1526 }, { "epoch": 5.08153078202995, "grad_norm": 10.670079231262207, "learning_rate": 5e-06, "loss": 0.8521, "num_input_tokens_seen": 95694840, "step": 1527 }, { "epoch": 5.08153078202995, "loss": 0.6557031273841858, "loss_ce": 0.0011621195590123534, "loss_iou": 0.26171875, "loss_num": 0.0262451171875, "loss_xval": 0.65625, "num_input_tokens_seen": 95694840, "step": 1527 }, { "epoch": 5.084858569051581, "grad_norm": 14.62645435333252, "learning_rate": 5e-06, "loss": 0.734, "num_input_tokens_seen": 95757612, "step": 1528 }, { "epoch": 5.084858569051581, "loss": 0.6704707145690918, "loss_ce": 0.0006709391018375754, "loss_iou": 0.2255859375, "loss_num": 0.043701171875, "loss_xval": 0.66796875, "num_input_tokens_seen": 95757612, "step": 1528 }, { "epoch": 5.088186356073211, "grad_norm": 16.821863174438477, "learning_rate": 5e-06, "loss": 0.9306, "num_input_tokens_seen": 95821208, "step": 1529 }, { "epoch": 5.088186356073211, "loss": 1.184844970703125, "loss_ce": 3.0552084353985265e-05, "loss_iou": 0.390625, "loss_num": 0.0810546875, "loss_xval": 1.1875, "num_input_tokens_seen": 95821208, "step": 1529 }, { "epoch": 5.091514143094842, "grad_norm": 79.41419982910156, "learning_rate": 5e-06, "loss": 0.7217, "num_input_tokens_seen": 95883876, "step": 1530 }, { "epoch": 5.091514143094842, "loss": 0.8382284641265869, "loss_ce": 0.0009482197929173708, "loss_iou": 0.333984375, "loss_num": 0.033447265625, "loss_xval": 0.8359375, "num_input_tokens_seen": 95883876, "step": 1530 }, { "epoch": 5.0948419301164725, "grad_norm": 25.368114471435547, "learning_rate": 5e-06, "loss": 0.6514, "num_input_tokens_seen": 95947076, "step": 1531 }, { "epoch": 5.0948419301164725, "loss": 0.7189469337463379, "loss_ce": 1.380766843794845e-05, "loss_iou": 0.2373046875, "loss_num": 0.048828125, "loss_xval": 0.71875, "num_input_tokens_seen": 95947076, "step": 1531 }, { "epoch": 5.098169717138103, "grad_norm": 13.117644309997559, "learning_rate": 5e-06, "loss": 0.8008, "num_input_tokens_seen": 96009676, "step": 1532 }, { "epoch": 5.098169717138103, "loss": 0.8573014736175537, "loss_ce": 0.00012374338984955102, "loss_iou": 0.234375, "loss_num": 0.07763671875, "loss_xval": 0.85546875, "num_input_tokens_seen": 96009676, "step": 1532 }, { "epoch": 5.101497504159734, "grad_norm": 10.874475479125977, "learning_rate": 5e-06, "loss": 0.376, "num_input_tokens_seen": 96071412, "step": 1533 }, { "epoch": 5.101497504159734, "loss": 0.4256836473941803, "loss_ce": 2.4467857656418346e-05, "loss_iou": 0.154296875, "loss_num": 0.0233154296875, "loss_xval": 0.42578125, "num_input_tokens_seen": 96071412, "step": 1533 }, { "epoch": 5.104825291181364, "grad_norm": 15.079007148742676, "learning_rate": 5e-06, "loss": 0.5986, "num_input_tokens_seen": 96133696, "step": 1534 }, { "epoch": 5.104825291181364, "loss": 0.5522220134735107, "loss_ce": 0.001440738094970584, "loss_iou": 0.1865234375, "loss_num": 0.03564453125, "loss_xval": 0.55078125, "num_input_tokens_seen": 96133696, "step": 1534 }, { "epoch": 5.108153078202995, "grad_norm": 12.708340644836426, "learning_rate": 5e-06, "loss": 0.7919, "num_input_tokens_seen": 96197404, "step": 1535 }, { "epoch": 5.108153078202995, "loss": 0.9537239670753479, "loss_ce": 0.001331356936134398, "loss_iou": 0.349609375, "loss_num": 0.051025390625, "loss_xval": 0.953125, "num_input_tokens_seen": 96197404, "step": 1535 }, { "epoch": 5.1114808652246255, "grad_norm": 9.230545043945312, "learning_rate": 5e-06, "loss": 0.534, "num_input_tokens_seen": 96260424, "step": 1536 }, { "epoch": 5.1114808652246255, "loss": 0.5380446314811707, "loss_ce": 0.0006911091622896492, "loss_iou": 0.177734375, "loss_num": 0.0361328125, "loss_xval": 0.5390625, "num_input_tokens_seen": 96260424, "step": 1536 }, { "epoch": 5.114808652246256, "grad_norm": 7.485297203063965, "learning_rate": 5e-06, "loss": 0.6481, "num_input_tokens_seen": 96324052, "step": 1537 }, { "epoch": 5.114808652246256, "loss": 0.8440206050872803, "loss_ce": 0.00014855540939606726, "loss_iou": 0.296875, "loss_num": 0.05029296875, "loss_xval": 0.84375, "num_input_tokens_seen": 96324052, "step": 1537 }, { "epoch": 5.118136439267887, "grad_norm": 12.548988342285156, "learning_rate": 5e-06, "loss": 0.7396, "num_input_tokens_seen": 96386140, "step": 1538 }, { "epoch": 5.118136439267887, "loss": 0.907487154006958, "loss_ce": 1.645735392230563e-05, "loss_iou": 0.30859375, "loss_num": 0.05810546875, "loss_xval": 0.90625, "num_input_tokens_seen": 96386140, "step": 1538 }, { "epoch": 5.121464226289517, "grad_norm": 9.125192642211914, "learning_rate": 5e-06, "loss": 0.5798, "num_input_tokens_seen": 96445552, "step": 1539 }, { "epoch": 5.121464226289517, "loss": 0.5199623703956604, "loss_ce": 3.884044417645782e-06, "loss_iou": 0.1630859375, "loss_num": 0.0390625, "loss_xval": 0.51953125, "num_input_tokens_seen": 96445552, "step": 1539 }, { "epoch": 5.124792013311148, "grad_norm": 9.516877174377441, "learning_rate": 5e-06, "loss": 0.4506, "num_input_tokens_seen": 96508364, "step": 1540 }, { "epoch": 5.124792013311148, "loss": 0.5682762265205383, "loss_ce": 0.00016097842308226973, "loss_iou": 0.2353515625, "loss_num": 0.0196533203125, "loss_xval": 0.56640625, "num_input_tokens_seen": 96508364, "step": 1540 }, { "epoch": 5.128119800332779, "grad_norm": 13.660243034362793, "learning_rate": 5e-06, "loss": 0.7161, "num_input_tokens_seen": 96571852, "step": 1541 }, { "epoch": 5.128119800332779, "loss": 0.8170957565307617, "loss_ce": 0.002154330490157008, "loss_iou": 0.279296875, "loss_num": 0.051025390625, "loss_xval": 0.81640625, "num_input_tokens_seen": 96571852, "step": 1541 }, { "epoch": 5.131447587354409, "grad_norm": 12.244997024536133, "learning_rate": 5e-06, "loss": 0.8757, "num_input_tokens_seen": 96637356, "step": 1542 }, { "epoch": 5.131447587354409, "loss": 0.7130774259567261, "loss_ce": 0.001651703380048275, "loss_iou": 0.25390625, "loss_num": 0.040771484375, "loss_xval": 0.7109375, "num_input_tokens_seen": 96637356, "step": 1542 }, { "epoch": 5.13477537437604, "grad_norm": 10.925747871398926, "learning_rate": 5e-06, "loss": 0.6949, "num_input_tokens_seen": 96698348, "step": 1543 }, { "epoch": 5.13477537437604, "loss": 0.516825795173645, "loss_ce": 0.000468335987534374, "loss_iou": 0.16796875, "loss_num": 0.035888671875, "loss_xval": 0.515625, "num_input_tokens_seen": 96698348, "step": 1543 }, { "epoch": 5.13810316139767, "grad_norm": 16.20490074157715, "learning_rate": 5e-06, "loss": 0.8087, "num_input_tokens_seen": 96759328, "step": 1544 }, { "epoch": 5.13810316139767, "loss": 0.8088467121124268, "loss_ce": 8.813009117147885e-06, "loss_iou": 0.28125, "loss_num": 0.049560546875, "loss_xval": 0.80859375, "num_input_tokens_seen": 96759328, "step": 1544 }, { "epoch": 5.141430948419301, "grad_norm": 13.553998947143555, "learning_rate": 5e-06, "loss": 0.9186, "num_input_tokens_seen": 96823196, "step": 1545 }, { "epoch": 5.141430948419301, "loss": 0.8833928108215332, "loss_ce": 0.0019231259357184172, "loss_iou": 0.3515625, "loss_num": 0.035400390625, "loss_xval": 0.8828125, "num_input_tokens_seen": 96823196, "step": 1545 }, { "epoch": 5.144758735440932, "grad_norm": 16.64641571044922, "learning_rate": 5e-06, "loss": 0.5477, "num_input_tokens_seen": 96886160, "step": 1546 }, { "epoch": 5.144758735440932, "loss": 0.5232193470001221, "loss_ce": 2.6001296646427363e-05, "loss_iou": 0.1533203125, "loss_num": 0.043212890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 96886160, "step": 1546 }, { "epoch": 5.148086522462562, "grad_norm": 12.320746421813965, "learning_rate": 5e-06, "loss": 0.8047, "num_input_tokens_seen": 96949040, "step": 1547 }, { "epoch": 5.148086522462562, "loss": 0.7521053552627563, "loss_ce": 3.014335743500851e-05, "loss_iou": 0.251953125, "loss_num": 0.04931640625, "loss_xval": 0.75390625, "num_input_tokens_seen": 96949040, "step": 1547 }, { "epoch": 5.151414309484193, "grad_norm": 8.744440078735352, "learning_rate": 5e-06, "loss": 0.8747, "num_input_tokens_seen": 97011576, "step": 1548 }, { "epoch": 5.151414309484193, "loss": 0.8263041973114014, "loss_ce": 0.0001323087781202048, "loss_iou": 0.30078125, "loss_num": 0.045166015625, "loss_xval": 0.828125, "num_input_tokens_seen": 97011576, "step": 1548 }, { "epoch": 5.1547420965058235, "grad_norm": 8.398408889770508, "learning_rate": 5e-06, "loss": 0.6888, "num_input_tokens_seen": 97073700, "step": 1549 }, { "epoch": 5.1547420965058235, "loss": 0.6298340559005737, "loss_ce": 0.0016601935494691133, "loss_iou": 0.2109375, "loss_num": 0.041259765625, "loss_xval": 0.62890625, "num_input_tokens_seen": 97073700, "step": 1549 }, { "epoch": 5.158069883527454, "grad_norm": 13.537564277648926, "learning_rate": 5e-06, "loss": 0.703, "num_input_tokens_seen": 97137488, "step": 1550 }, { "epoch": 5.158069883527454, "loss": 0.791496992111206, "loss_ce": 0.0006034679245203733, "loss_iou": 0.31640625, "loss_num": 0.03125, "loss_xval": 0.7890625, "num_input_tokens_seen": 97137488, "step": 1550 }, { "epoch": 5.161397670549085, "grad_norm": 35.76241683959961, "learning_rate": 5e-06, "loss": 0.7945, "num_input_tokens_seen": 97198004, "step": 1551 }, { "epoch": 5.161397670549085, "loss": 0.7911405563354492, "loss_ce": 2.8721469789161347e-06, "loss_iou": 0.267578125, "loss_num": 0.05078125, "loss_xval": 0.79296875, "num_input_tokens_seen": 97198004, "step": 1551 }, { "epoch": 5.164725457570715, "grad_norm": 14.66862964630127, "learning_rate": 5e-06, "loss": 0.6592, "num_input_tokens_seen": 97260336, "step": 1552 }, { "epoch": 5.164725457570715, "loss": 0.5582370162010193, "loss_ce": 0.00025361799634993076, "loss_iou": 0.1865234375, "loss_num": 0.036865234375, "loss_xval": 0.55859375, "num_input_tokens_seen": 97260336, "step": 1552 }, { "epoch": 5.168053244592346, "grad_norm": 8.499744415283203, "learning_rate": 5e-06, "loss": 0.5118, "num_input_tokens_seen": 97322608, "step": 1553 }, { "epoch": 5.168053244592346, "loss": 0.5915481448173523, "loss_ce": 0.0005447297007776797, "loss_iou": 0.2265625, "loss_num": 0.027587890625, "loss_xval": 0.58984375, "num_input_tokens_seen": 97322608, "step": 1553 }, { "epoch": 5.1713810316139766, "grad_norm": 13.193866729736328, "learning_rate": 5e-06, "loss": 0.5383, "num_input_tokens_seen": 97385600, "step": 1554 }, { "epoch": 5.1713810316139766, "loss": 0.35793358087539673, "loss_ce": 2.3437820345861837e-05, "loss_iou": 0.095703125, "loss_num": 0.033203125, "loss_xval": 0.357421875, "num_input_tokens_seen": 97385600, "step": 1554 }, { "epoch": 5.174708818635607, "grad_norm": 19.962928771972656, "learning_rate": 5e-06, "loss": 0.6914, "num_input_tokens_seen": 97447764, "step": 1555 }, { "epoch": 5.174708818635607, "loss": 0.6495640277862549, "loss_ce": 0.0005161368753761053, "loss_iou": 0.208984375, "loss_num": 0.046142578125, "loss_xval": 0.6484375, "num_input_tokens_seen": 97447764, "step": 1555 }, { "epoch": 5.178036605657238, "grad_norm": 15.584821701049805, "learning_rate": 5e-06, "loss": 0.737, "num_input_tokens_seen": 97510692, "step": 1556 }, { "epoch": 5.178036605657238, "loss": 0.7587304711341858, "loss_ce": 0.0005517329555004835, "loss_iou": 0.28515625, "loss_num": 0.037353515625, "loss_xval": 0.7578125, "num_input_tokens_seen": 97510692, "step": 1556 }, { "epoch": 5.181364392678868, "grad_norm": 16.34670066833496, "learning_rate": 5e-06, "loss": 1.0405, "num_input_tokens_seen": 97573088, "step": 1557 }, { "epoch": 5.181364392678868, "loss": 1.0706849098205566, "loss_ce": 0.00012822201824747026, "loss_iou": 0.33203125, "loss_num": 0.08154296875, "loss_xval": 1.0703125, "num_input_tokens_seen": 97573088, "step": 1557 }, { "epoch": 5.184692179700499, "grad_norm": 24.74776840209961, "learning_rate": 5e-06, "loss": 0.7951, "num_input_tokens_seen": 97636512, "step": 1558 }, { "epoch": 5.184692179700499, "loss": 0.9686456918716431, "loss_ce": 1.7786769603844732e-05, "loss_iou": 0.35546875, "loss_num": 0.0517578125, "loss_xval": 0.96875, "num_input_tokens_seen": 97636512, "step": 1558 }, { "epoch": 5.18801996672213, "grad_norm": 8.181451797485352, "learning_rate": 5e-06, "loss": 0.7342, "num_input_tokens_seen": 97699408, "step": 1559 }, { "epoch": 5.18801996672213, "loss": 0.6913556456565857, "loss_ce": 0.0006818252149969339, "loss_iou": 0.296875, "loss_num": 0.0196533203125, "loss_xval": 0.69140625, "num_input_tokens_seen": 97699408, "step": 1559 }, { "epoch": 5.19134775374376, "grad_norm": 9.973201751708984, "learning_rate": 5e-06, "loss": 0.7311, "num_input_tokens_seen": 97761664, "step": 1560 }, { "epoch": 5.19134775374376, "loss": 0.5677616596221924, "loss_ce": 1.2609091754711699e-05, "loss_iou": 0.1689453125, "loss_num": 0.046142578125, "loss_xval": 0.56640625, "num_input_tokens_seen": 97761664, "step": 1560 }, { "epoch": 5.194675540765391, "grad_norm": 13.940975189208984, "learning_rate": 5e-06, "loss": 0.5023, "num_input_tokens_seen": 97823884, "step": 1561 }, { "epoch": 5.194675540765391, "loss": 0.5250530242919922, "loss_ce": 2.8577667762874626e-05, "loss_iou": 0.1650390625, "loss_num": 0.039306640625, "loss_xval": 0.5234375, "num_input_tokens_seen": 97823884, "step": 1561 }, { "epoch": 5.1980033277870215, "grad_norm": 10.562603950500488, "learning_rate": 5e-06, "loss": 0.6535, "num_input_tokens_seen": 97886020, "step": 1562 }, { "epoch": 5.1980033277870215, "loss": 0.6427997350692749, "loss_ce": 0.0007098691421560943, "loss_iou": 0.23046875, "loss_num": 0.036376953125, "loss_xval": 0.640625, "num_input_tokens_seen": 97886020, "step": 1562 }, { "epoch": 5.201331114808652, "grad_norm": 20.083114624023438, "learning_rate": 5e-06, "loss": 0.6058, "num_input_tokens_seen": 97947632, "step": 1563 }, { "epoch": 5.201331114808652, "loss": 0.564403772354126, "loss_ce": 0.0003778576210606843, "loss_iou": 0.1953125, "loss_num": 0.034912109375, "loss_xval": 0.5625, "num_input_tokens_seen": 97947632, "step": 1563 }, { "epoch": 5.204658901830283, "grad_norm": 34.775455474853516, "learning_rate": 5e-06, "loss": 0.8295, "num_input_tokens_seen": 98010084, "step": 1564 }, { "epoch": 5.204658901830283, "loss": 0.667966902256012, "loss_ce": 5.918982424191199e-05, "loss_iou": 0.25390625, "loss_num": 0.032470703125, "loss_xval": 0.66796875, "num_input_tokens_seen": 98010084, "step": 1564 }, { "epoch": 5.207986688851913, "grad_norm": 14.078563690185547, "learning_rate": 5e-06, "loss": 0.5324, "num_input_tokens_seen": 98072816, "step": 1565 }, { "epoch": 5.207986688851913, "loss": 0.29302480816841125, "loss_ce": 0.00048331127618439496, "loss_iou": 0.0693359375, "loss_num": 0.03076171875, "loss_xval": 0.29296875, "num_input_tokens_seen": 98072816, "step": 1565 }, { "epoch": 5.211314475873544, "grad_norm": 12.865399360656738, "learning_rate": 5e-06, "loss": 0.6898, "num_input_tokens_seen": 98135732, "step": 1566 }, { "epoch": 5.211314475873544, "loss": 0.6527900695800781, "loss_ce": 0.0006904720212332904, "loss_iou": 0.1826171875, "loss_num": 0.057373046875, "loss_xval": 0.65234375, "num_input_tokens_seen": 98135732, "step": 1566 }, { "epoch": 5.2146422628951745, "grad_norm": 30.85588836669922, "learning_rate": 5e-06, "loss": 0.7293, "num_input_tokens_seen": 98200008, "step": 1567 }, { "epoch": 5.2146422628951745, "loss": 0.7736085057258606, "loss_ce": 0.00041512242751196027, "loss_iou": 0.30078125, "loss_num": 0.0341796875, "loss_xval": 0.7734375, "num_input_tokens_seen": 98200008, "step": 1567 }, { "epoch": 5.217970049916805, "grad_norm": 29.402868270874023, "learning_rate": 5e-06, "loss": 0.8229, "num_input_tokens_seen": 98264616, "step": 1568 }, { "epoch": 5.217970049916805, "loss": 0.63280189037323, "loss_ce": 0.00072178163100034, "loss_iou": 0.2294921875, "loss_num": 0.034423828125, "loss_xval": 0.6328125, "num_input_tokens_seen": 98264616, "step": 1568 }, { "epoch": 5.221297836938436, "grad_norm": 20.66412925720215, "learning_rate": 5e-06, "loss": 0.6474, "num_input_tokens_seen": 98327500, "step": 1569 }, { "epoch": 5.221297836938436, "loss": 0.8841537833213806, "loss_ce": 0.00024261744692921638, "loss_iou": 0.322265625, "loss_num": 0.048095703125, "loss_xval": 0.8828125, "num_input_tokens_seen": 98327500, "step": 1569 }, { "epoch": 5.224625623960066, "grad_norm": 7.796050071716309, "learning_rate": 5e-06, "loss": 0.6683, "num_input_tokens_seen": 98390560, "step": 1570 }, { "epoch": 5.224625623960066, "loss": 0.6554022431373596, "loss_ce": 6.712816229992313e-06, "loss_iou": 0.26953125, "loss_num": 0.0230712890625, "loss_xval": 0.65625, "num_input_tokens_seen": 98390560, "step": 1570 }, { "epoch": 5.227953410981697, "grad_norm": 22.678808212280273, "learning_rate": 5e-06, "loss": 0.743, "num_input_tokens_seen": 98451556, "step": 1571 }, { "epoch": 5.227953410981697, "loss": 0.8813857436180115, "loss_ce": 3.806907989201136e-05, "loss_iou": 0.318359375, "loss_num": 0.048828125, "loss_xval": 0.8828125, "num_input_tokens_seen": 98451556, "step": 1571 }, { "epoch": 5.231281198003328, "grad_norm": 41.750709533691406, "learning_rate": 5e-06, "loss": 1.0732, "num_input_tokens_seen": 98514720, "step": 1572 }, { "epoch": 5.231281198003328, "loss": 1.1592261791229248, "loss_ce": 0.0010230218758806586, "loss_iou": 0.3515625, "loss_num": 0.0908203125, "loss_xval": 1.15625, "num_input_tokens_seen": 98514720, "step": 1572 }, { "epoch": 5.234608985024958, "grad_norm": 45.86041259765625, "learning_rate": 5e-06, "loss": 0.7024, "num_input_tokens_seen": 98578124, "step": 1573 }, { "epoch": 5.234608985024958, "loss": 0.7642949819564819, "loss_ce": 0.0006078842561691999, "loss_iou": 0.267578125, "loss_num": 0.0458984375, "loss_xval": 0.765625, "num_input_tokens_seen": 98578124, "step": 1573 }, { "epoch": 5.237936772046589, "grad_norm": 13.958196640014648, "learning_rate": 5e-06, "loss": 0.8009, "num_input_tokens_seen": 98642292, "step": 1574 }, { "epoch": 5.237936772046589, "loss": 0.6747276782989502, "loss_ce": 0.0016319530550390482, "loss_iou": 0.1943359375, "loss_num": 0.056884765625, "loss_xval": 0.671875, "num_input_tokens_seen": 98642292, "step": 1574 }, { "epoch": 5.241264559068219, "grad_norm": 1243.618896484375, "learning_rate": 5e-06, "loss": 0.774, "num_input_tokens_seen": 98704420, "step": 1575 }, { "epoch": 5.241264559068219, "loss": 0.8179287314414978, "loss_ce": 0.0005458915256895125, "loss_iou": 0.279296875, "loss_num": 0.0517578125, "loss_xval": 0.81640625, "num_input_tokens_seen": 98704420, "step": 1575 }, { "epoch": 5.24459234608985, "grad_norm": 10.360309600830078, "learning_rate": 5e-06, "loss": 0.7435, "num_input_tokens_seen": 98769476, "step": 1576 }, { "epoch": 5.24459234608985, "loss": 0.7698988914489746, "loss_ce": 0.0002455537032801658, "loss_iou": 0.302734375, "loss_num": 0.03271484375, "loss_xval": 0.76953125, "num_input_tokens_seen": 98769476, "step": 1576 }, { "epoch": 5.247920133111481, "grad_norm": 7.8587565422058105, "learning_rate": 5e-06, "loss": 0.4282, "num_input_tokens_seen": 98830068, "step": 1577 }, { "epoch": 5.247920133111481, "loss": 0.5417072772979736, "loss_ce": 2.0255403796909377e-05, "loss_iou": 0.18359375, "loss_num": 0.03515625, "loss_xval": 0.54296875, "num_input_tokens_seen": 98830068, "step": 1577 }, { "epoch": 5.251247920133111, "grad_norm": 8.830144882202148, "learning_rate": 5e-06, "loss": 0.7409, "num_input_tokens_seen": 98892736, "step": 1578 }, { "epoch": 5.251247920133111, "loss": 0.6595664620399475, "loss_ce": 2.0516697986749932e-05, "loss_iou": 0.2431640625, "loss_num": 0.03466796875, "loss_xval": 0.66015625, "num_input_tokens_seen": 98892736, "step": 1578 }, { "epoch": 5.254575707154742, "grad_norm": 15.969278335571289, "learning_rate": 5e-06, "loss": 0.6557, "num_input_tokens_seen": 98956260, "step": 1579 }, { "epoch": 5.254575707154742, "loss": 0.4240087568759918, "loss_ce": 0.0003026947088073939, "loss_iou": 0.1484375, "loss_num": 0.0252685546875, "loss_xval": 0.423828125, "num_input_tokens_seen": 98956260, "step": 1579 }, { "epoch": 5.2579034941763725, "grad_norm": 12.759567260742188, "learning_rate": 5e-06, "loss": 0.5799, "num_input_tokens_seen": 99017636, "step": 1580 }, { "epoch": 5.2579034941763725, "loss": 0.4847814440727234, "loss_ce": 0.001138861756771803, "loss_iou": 0.1572265625, "loss_num": 0.03369140625, "loss_xval": 0.484375, "num_input_tokens_seen": 99017636, "step": 1580 }, { "epoch": 5.261231281198003, "grad_norm": 7.065427780151367, "learning_rate": 5e-06, "loss": 0.6962, "num_input_tokens_seen": 99081064, "step": 1581 }, { "epoch": 5.261231281198003, "loss": 0.7337771654129028, "loss_ce": 0.0007449376862496138, "loss_iou": 0.25, "loss_num": 0.046142578125, "loss_xval": 0.734375, "num_input_tokens_seen": 99081064, "step": 1581 }, { "epoch": 5.264559068219634, "grad_norm": 12.841547012329102, "learning_rate": 5e-06, "loss": 0.7063, "num_input_tokens_seen": 99143616, "step": 1582 }, { "epoch": 5.264559068219634, "loss": 0.6888269186019897, "loss_ce": 0.00022827113571111113, "loss_iou": 0.271484375, "loss_num": 0.0286865234375, "loss_xval": 0.6875, "num_input_tokens_seen": 99143616, "step": 1582 }, { "epoch": 5.267886855241264, "grad_norm": 21.898542404174805, "learning_rate": 5e-06, "loss": 0.7441, "num_input_tokens_seen": 99206620, "step": 1583 }, { "epoch": 5.267886855241264, "loss": 1.0425233840942383, "loss_ce": 0.00040918824379332364, "loss_iou": 0.408203125, "loss_num": 0.044921875, "loss_xval": 1.0390625, "num_input_tokens_seen": 99206620, "step": 1583 }, { "epoch": 5.271214642262895, "grad_norm": 18.596426010131836, "learning_rate": 5e-06, "loss": 0.7307, "num_input_tokens_seen": 99270064, "step": 1584 }, { "epoch": 5.271214642262895, "loss": 0.7801717519760132, "loss_ce": 0.00038661700091324747, "loss_iou": 0.314453125, "loss_num": 0.030517578125, "loss_xval": 0.78125, "num_input_tokens_seen": 99270064, "step": 1584 }, { "epoch": 5.2745424292845255, "grad_norm": 23.57819366455078, "learning_rate": 5e-06, "loss": 0.8169, "num_input_tokens_seen": 99333904, "step": 1585 }, { "epoch": 5.2745424292845255, "loss": 0.5337511897087097, "loss_ce": 0.0007311389781534672, "loss_iou": 0.1923828125, "loss_num": 0.029541015625, "loss_xval": 0.53125, "num_input_tokens_seen": 99333904, "step": 1585 }, { "epoch": 5.277870216306156, "grad_norm": 37.10131072998047, "learning_rate": 5e-06, "loss": 0.7919, "num_input_tokens_seen": 99398088, "step": 1586 }, { "epoch": 5.277870216306156, "loss": 0.803447961807251, "loss_ce": 0.0004694595991168171, "loss_iou": 0.328125, "loss_num": 0.02978515625, "loss_xval": 0.8046875, "num_input_tokens_seen": 99398088, "step": 1586 }, { "epoch": 5.281198003327787, "grad_norm": 19.156675338745117, "learning_rate": 5e-06, "loss": 0.6396, "num_input_tokens_seen": 99459368, "step": 1587 }, { "epoch": 5.281198003327787, "loss": 0.9905081391334534, "loss_ce": 2.9618571716127917e-05, "loss_iou": 0.328125, "loss_num": 0.06640625, "loss_xval": 0.9921875, "num_input_tokens_seen": 99459368, "step": 1587 }, { "epoch": 5.284525790349417, "grad_norm": 8.31318473815918, "learning_rate": 5e-06, "loss": 0.4815, "num_input_tokens_seen": 99520824, "step": 1588 }, { "epoch": 5.284525790349417, "loss": 0.33191707730293274, "loss_ce": 0.0004961899248883128, "loss_iou": 0.09375, "loss_num": 0.0286865234375, "loss_xval": 0.33203125, "num_input_tokens_seen": 99520824, "step": 1588 }, { "epoch": 5.287853577371048, "grad_norm": 9.956439971923828, "learning_rate": 5e-06, "loss": 0.6581, "num_input_tokens_seen": 99581932, "step": 1589 }, { "epoch": 5.287853577371048, "loss": 0.7131649255752563, "loss_ce": 3.017848575836979e-05, "loss_iou": 0.26953125, "loss_num": 0.034912109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 99581932, "step": 1589 }, { "epoch": 5.291181364392679, "grad_norm": 7.714879512786865, "learning_rate": 5e-06, "loss": 0.7462, "num_input_tokens_seen": 99644652, "step": 1590 }, { "epoch": 5.291181364392679, "loss": 0.7490200996398926, "loss_ce": 0.0003017770650330931, "loss_iou": 0.259765625, "loss_num": 0.0458984375, "loss_xval": 0.75, "num_input_tokens_seen": 99644652, "step": 1590 }, { "epoch": 5.294509151414309, "grad_norm": 28.146652221679688, "learning_rate": 5e-06, "loss": 0.6564, "num_input_tokens_seen": 99708500, "step": 1591 }, { "epoch": 5.294509151414309, "loss": 0.8539837598800659, "loss_ce": 0.0013836309080943465, "loss_iou": 0.3515625, "loss_num": 0.030029296875, "loss_xval": 0.8515625, "num_input_tokens_seen": 99708500, "step": 1591 }, { "epoch": 5.29783693843594, "grad_norm": 19.461881637573242, "learning_rate": 5e-06, "loss": 0.6438, "num_input_tokens_seen": 99771208, "step": 1592 }, { "epoch": 5.29783693843594, "loss": 0.6133031845092773, "loss_ce": 2.1985513740219176e-05, "loss_iou": 0.2080078125, "loss_num": 0.039306640625, "loss_xval": 0.61328125, "num_input_tokens_seen": 99771208, "step": 1592 }, { "epoch": 5.3011647254575704, "grad_norm": 26.864931106567383, "learning_rate": 5e-06, "loss": 0.5504, "num_input_tokens_seen": 99833760, "step": 1593 }, { "epoch": 5.3011647254575704, "loss": 0.5138239860534668, "loss_ce": 0.0006403637235052884, "loss_iou": 0.1865234375, "loss_num": 0.028076171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 99833760, "step": 1593 }, { "epoch": 5.304492512479201, "grad_norm": 8.363646507263184, "learning_rate": 5e-06, "loss": 0.7576, "num_input_tokens_seen": 99897804, "step": 1594 }, { "epoch": 5.304492512479201, "loss": 0.7315871715545654, "loss_ce": 0.0003860450815409422, "loss_iou": 0.2041015625, "loss_num": 0.06494140625, "loss_xval": 0.73046875, "num_input_tokens_seen": 99897804, "step": 1594 }, { "epoch": 5.307820299500832, "grad_norm": 69.57489013671875, "learning_rate": 5e-06, "loss": 0.5305, "num_input_tokens_seen": 99960196, "step": 1595 }, { "epoch": 5.307820299500832, "loss": 0.6942192912101746, "loss_ce": 0.00018853397341445088, "loss_iou": 0.28515625, "loss_num": 0.0245361328125, "loss_xval": 0.6953125, "num_input_tokens_seen": 99960196, "step": 1595 }, { "epoch": 5.311148086522462, "grad_norm": 11.756884574890137, "learning_rate": 5e-06, "loss": 0.6475, "num_input_tokens_seen": 100022584, "step": 1596 }, { "epoch": 5.311148086522462, "loss": 0.7136390805244446, "loss_ce": 1.6043655705288984e-05, "loss_iou": 0.2099609375, "loss_num": 0.058837890625, "loss_xval": 0.71484375, "num_input_tokens_seen": 100022584, "step": 1596 }, { "epoch": 5.314475873544093, "grad_norm": 8.837007522583008, "learning_rate": 5e-06, "loss": 0.6514, "num_input_tokens_seen": 100086124, "step": 1597 }, { "epoch": 5.314475873544093, "loss": 0.6199404001235962, "loss_ce": 0.0003114750434178859, "loss_iou": 0.2216796875, "loss_num": 0.035400390625, "loss_xval": 0.62109375, "num_input_tokens_seen": 100086124, "step": 1597 }, { "epoch": 5.3178036605657235, "grad_norm": 8.616264343261719, "learning_rate": 5e-06, "loss": 0.7488, "num_input_tokens_seen": 100148952, "step": 1598 }, { "epoch": 5.3178036605657235, "loss": 0.45825719833374023, "loss_ce": 5.2268542276578955e-06, "loss_iou": 0.1591796875, "loss_num": 0.02783203125, "loss_xval": 0.458984375, "num_input_tokens_seen": 100148952, "step": 1598 }, { "epoch": 5.321131447587354, "grad_norm": 12.141873359680176, "learning_rate": 5e-06, "loss": 0.5747, "num_input_tokens_seen": 100210792, "step": 1599 }, { "epoch": 5.321131447587354, "loss": 0.4171640872955322, "loss_ce": 0.0004160656244494021, "loss_iou": 0.1513671875, "loss_num": 0.02294921875, "loss_xval": 0.416015625, "num_input_tokens_seen": 100210792, "step": 1599 }, { "epoch": 5.324459234608985, "grad_norm": 17.452781677246094, "learning_rate": 5e-06, "loss": 0.5113, "num_input_tokens_seen": 100272944, "step": 1600 }, { "epoch": 5.324459234608985, "loss": 0.5932729840278625, "loss_ce": 0.0009878204436972737, "loss_iou": 0.22265625, "loss_num": 0.0294189453125, "loss_xval": 0.59375, "num_input_tokens_seen": 100272944, "step": 1600 }, { "epoch": 5.327787021630615, "grad_norm": 13.12947940826416, "learning_rate": 5e-06, "loss": 0.4211, "num_input_tokens_seen": 100336000, "step": 1601 }, { "epoch": 5.327787021630615, "loss": 0.34248068928718567, "loss_ce": 0.00013448798563331366, "loss_iou": 0.12255859375, "loss_num": 0.0194091796875, "loss_xval": 0.341796875, "num_input_tokens_seen": 100336000, "step": 1601 }, { "epoch": 5.331114808652246, "grad_norm": 16.778797149658203, "learning_rate": 5e-06, "loss": 0.9173, "num_input_tokens_seen": 100398688, "step": 1602 }, { "epoch": 5.331114808652246, "loss": 0.812654972076416, "loss_ce": 0.00015499009168706834, "loss_iou": 0.3203125, "loss_num": 0.034912109375, "loss_xval": 0.8125, "num_input_tokens_seen": 100398688, "step": 1602 }, { "epoch": 5.334442595673877, "grad_norm": 41.81510543823242, "learning_rate": 5e-06, "loss": 0.7542, "num_input_tokens_seen": 100461044, "step": 1603 }, { "epoch": 5.334442595673877, "loss": 0.9035730361938477, "loss_ce": 8.593749953433871e-06, "loss_iou": 0.279296875, "loss_num": 0.06884765625, "loss_xval": 0.90234375, "num_input_tokens_seen": 100461044, "step": 1603 }, { "epoch": 5.337770382695507, "grad_norm": 27.792001724243164, "learning_rate": 5e-06, "loss": 0.6624, "num_input_tokens_seen": 100524272, "step": 1604 }, { "epoch": 5.337770382695507, "loss": 0.4387521743774414, "loss_ce": 3.14875396725256e-05, "loss_iou": 0.158203125, "loss_num": 0.0244140625, "loss_xval": 0.439453125, "num_input_tokens_seen": 100524272, "step": 1604 }, { "epoch": 5.341098169717138, "grad_norm": 10.232015609741211, "learning_rate": 5e-06, "loss": 0.6419, "num_input_tokens_seen": 100586896, "step": 1605 }, { "epoch": 5.341098169717138, "loss": 0.7033101320266724, "loss_ce": 0.00018513888062443584, "loss_iou": 0.26171875, "loss_num": 0.035888671875, "loss_xval": 0.703125, "num_input_tokens_seen": 100586896, "step": 1605 }, { "epoch": 5.344425956738768, "grad_norm": 9.116806983947754, "learning_rate": 5e-06, "loss": 0.5147, "num_input_tokens_seen": 100649132, "step": 1606 }, { "epoch": 5.344425956738768, "loss": 0.42553964257240295, "loss_ce": 2.5271147023886442e-06, "loss_iou": 0.146484375, "loss_num": 0.0264892578125, "loss_xval": 0.42578125, "num_input_tokens_seen": 100649132, "step": 1606 }, { "epoch": 5.347753743760399, "grad_norm": 24.01177978515625, "learning_rate": 5e-06, "loss": 0.7176, "num_input_tokens_seen": 100712640, "step": 1607 }, { "epoch": 5.347753743760399, "loss": 0.8379149436950684, "loss_ce": 0.0006346513982862234, "loss_iou": 0.302734375, "loss_num": 0.046630859375, "loss_xval": 0.8359375, "num_input_tokens_seen": 100712640, "step": 1607 }, { "epoch": 5.35108153078203, "grad_norm": 17.368818283081055, "learning_rate": 5e-06, "loss": 0.6702, "num_input_tokens_seen": 100775344, "step": 1608 }, { "epoch": 5.35108153078203, "loss": 0.8547390699386597, "loss_ce": 2.7412108920543687e-06, "loss_iou": 0.326171875, "loss_num": 0.040771484375, "loss_xval": 0.85546875, "num_input_tokens_seen": 100775344, "step": 1608 }, { "epoch": 5.35440931780366, "grad_norm": 19.727764129638672, "learning_rate": 5e-06, "loss": 0.7125, "num_input_tokens_seen": 100839532, "step": 1609 }, { "epoch": 5.35440931780366, "loss": 0.8060716390609741, "loss_ce": 0.0013841248583048582, "loss_iou": 0.287109375, "loss_num": 0.04638671875, "loss_xval": 0.8046875, "num_input_tokens_seen": 100839532, "step": 1609 }, { "epoch": 5.357737104825291, "grad_norm": 14.09434700012207, "learning_rate": 5e-06, "loss": 0.6749, "num_input_tokens_seen": 100903348, "step": 1610 }, { "epoch": 5.357737104825291, "loss": 0.7217329740524292, "loss_ce": 0.0001753973338054493, "loss_iou": 0.2578125, "loss_num": 0.04150390625, "loss_xval": 0.72265625, "num_input_tokens_seen": 100903348, "step": 1610 }, { "epoch": 5.3610648918469215, "grad_norm": 36.83820343017578, "learning_rate": 5e-06, "loss": 0.5817, "num_input_tokens_seen": 100964912, "step": 1611 }, { "epoch": 5.3610648918469215, "loss": 0.6560474634170532, "loss_ce": 4.160653043072671e-05, "loss_iou": 0.216796875, "loss_num": 0.04443359375, "loss_xval": 0.65625, "num_input_tokens_seen": 100964912, "step": 1611 }, { "epoch": 5.364392678868552, "grad_norm": 11.305191993713379, "learning_rate": 5e-06, "loss": 0.4619, "num_input_tokens_seen": 101026424, "step": 1612 }, { "epoch": 5.364392678868552, "loss": 0.6465030908584595, "loss_ce": 1.8739890947472304e-05, "loss_iou": 0.2373046875, "loss_num": 0.03466796875, "loss_xval": 0.6484375, "num_input_tokens_seen": 101026424, "step": 1612 }, { "epoch": 5.367720465890183, "grad_norm": 11.862913131713867, "learning_rate": 5e-06, "loss": 0.8137, "num_input_tokens_seen": 101090924, "step": 1613 }, { "epoch": 5.367720465890183, "loss": 0.8782895803451538, "loss_ce": 0.00023779345792718232, "loss_iou": 0.32421875, "loss_num": 0.046142578125, "loss_xval": 0.87890625, "num_input_tokens_seen": 101090924, "step": 1613 }, { "epoch": 5.371048252911813, "grad_norm": 18.821861267089844, "learning_rate": 5e-06, "loss": 0.3774, "num_input_tokens_seen": 101153632, "step": 1614 }, { "epoch": 5.371048252911813, "loss": 0.4713307321071625, "loss_ce": 1.724835419736337e-05, "loss_iou": 0.1875, "loss_num": 0.019287109375, "loss_xval": 0.470703125, "num_input_tokens_seen": 101153632, "step": 1614 }, { "epoch": 5.374376039933444, "grad_norm": 18.81785774230957, "learning_rate": 5e-06, "loss": 0.4723, "num_input_tokens_seen": 101216952, "step": 1615 }, { "epoch": 5.374376039933444, "loss": 0.5333555936813354, "loss_ce": 0.0013731429353356361, "loss_iou": 0.169921875, "loss_num": 0.0380859375, "loss_xval": 0.53125, "num_input_tokens_seen": 101216952, "step": 1615 }, { "epoch": 5.3777038269550745, "grad_norm": 8.743084907531738, "learning_rate": 5e-06, "loss": 0.5347, "num_input_tokens_seen": 101278632, "step": 1616 }, { "epoch": 5.3777038269550745, "loss": 0.5145453214645386, "loss_ce": 0.00014100028784014285, "loss_iou": 0.1455078125, "loss_num": 0.044677734375, "loss_xval": 0.515625, "num_input_tokens_seen": 101278632, "step": 1616 }, { "epoch": 5.381031613976705, "grad_norm": 15.498371124267578, "learning_rate": 5e-06, "loss": 0.8427, "num_input_tokens_seen": 101341908, "step": 1617 }, { "epoch": 5.381031613976705, "loss": 0.5787339210510254, "loss_ce": 0.00012060911831213161, "loss_iou": 0.2216796875, "loss_num": 0.027099609375, "loss_xval": 0.578125, "num_input_tokens_seen": 101341908, "step": 1617 }, { "epoch": 5.384359400998336, "grad_norm": 25.164121627807617, "learning_rate": 5e-06, "loss": 0.5582, "num_input_tokens_seen": 101403536, "step": 1618 }, { "epoch": 5.384359400998336, "loss": 0.4660763144493103, "loss_ce": 1.1841300874948502e-05, "loss_iou": 0.1357421875, "loss_num": 0.038818359375, "loss_xval": 0.466796875, "num_input_tokens_seen": 101403536, "step": 1618 }, { "epoch": 5.387687188019966, "grad_norm": 10.2332763671875, "learning_rate": 5e-06, "loss": 0.6704, "num_input_tokens_seen": 101467088, "step": 1619 }, { "epoch": 5.387687188019966, "loss": 0.490899920463562, "loss_ce": 0.00042139904689975083, "loss_iou": 0.1962890625, "loss_num": 0.019775390625, "loss_xval": 0.490234375, "num_input_tokens_seen": 101467088, "step": 1619 }, { "epoch": 5.391014975041597, "grad_norm": 19.87059783935547, "learning_rate": 5e-06, "loss": 0.5354, "num_input_tokens_seen": 101529148, "step": 1620 }, { "epoch": 5.391014975041597, "loss": 0.5794316530227661, "loss_ce": 0.0011235260171815753, "loss_iou": 0.1826171875, "loss_num": 0.042724609375, "loss_xval": 0.578125, "num_input_tokens_seen": 101529148, "step": 1620 }, { "epoch": 5.394342762063228, "grad_norm": 11.307812690734863, "learning_rate": 5e-06, "loss": 0.6216, "num_input_tokens_seen": 101592284, "step": 1621 }, { "epoch": 5.394342762063228, "loss": 0.6232434511184692, "loss_ce": 0.0006848212797194719, "loss_iou": 0.228515625, "loss_num": 0.033203125, "loss_xval": 0.62109375, "num_input_tokens_seen": 101592284, "step": 1621 }, { "epoch": 5.397670549084858, "grad_norm": 22.18927001953125, "learning_rate": 5e-06, "loss": 0.6218, "num_input_tokens_seen": 101654680, "step": 1622 }, { "epoch": 5.397670549084858, "loss": 0.717785120010376, "loss_ce": 0.0004999724333174527, "loss_iou": 0.234375, "loss_num": 0.0498046875, "loss_xval": 0.71875, "num_input_tokens_seen": 101654680, "step": 1622 }, { "epoch": 5.400998336106489, "grad_norm": 14.127787590026855, "learning_rate": 5e-06, "loss": 0.7036, "num_input_tokens_seen": 101717188, "step": 1623 }, { "epoch": 5.400998336106489, "loss": 0.8181177377700806, "loss_ce": 0.00036871584597975016, "loss_iou": 0.28125, "loss_num": 0.05078125, "loss_xval": 0.81640625, "num_input_tokens_seen": 101717188, "step": 1623 }, { "epoch": 5.404326123128119, "grad_norm": 15.851911544799805, "learning_rate": 5e-06, "loss": 0.7727, "num_input_tokens_seen": 101780468, "step": 1624 }, { "epoch": 5.404326123128119, "loss": 0.8187301158905029, "loss_ce": 0.00018765513959806412, "loss_iou": 0.34375, "loss_num": 0.0263671875, "loss_xval": 0.8203125, "num_input_tokens_seen": 101780468, "step": 1624 }, { "epoch": 5.40765391014975, "grad_norm": 7.549576759338379, "learning_rate": 5e-06, "loss": 0.4095, "num_input_tokens_seen": 101842308, "step": 1625 }, { "epoch": 5.40765391014975, "loss": 0.38657066226005554, "loss_ce": 0.0008284934447146952, "loss_iou": 0.08984375, "loss_num": 0.041015625, "loss_xval": 0.38671875, "num_input_tokens_seen": 101842308, "step": 1625 }, { "epoch": 5.410981697171381, "grad_norm": 9.882723808288574, "learning_rate": 5e-06, "loss": 0.6379, "num_input_tokens_seen": 101903744, "step": 1626 }, { "epoch": 5.410981697171381, "loss": 0.6620615720748901, "loss_ce": 0.0009287626016885042, "loss_iou": 0.25, "loss_num": 0.0322265625, "loss_xval": 0.66015625, "num_input_tokens_seen": 101903744, "step": 1626 }, { "epoch": 5.414309484193011, "grad_norm": 14.373669624328613, "learning_rate": 5e-06, "loss": 0.8154, "num_input_tokens_seen": 101967284, "step": 1627 }, { "epoch": 5.414309484193011, "loss": 0.6620774865150452, "loss_ce": 0.0007005495717749, "loss_iou": 0.2412109375, "loss_num": 0.03564453125, "loss_xval": 0.66015625, "num_input_tokens_seen": 101967284, "step": 1627 }, { "epoch": 5.417637271214642, "grad_norm": 18.5496768951416, "learning_rate": 5e-06, "loss": 0.6312, "num_input_tokens_seen": 102029524, "step": 1628 }, { "epoch": 5.417637271214642, "loss": 0.7796862721443176, "loss_ce": 0.0016101216897368431, "loss_iou": 0.279296875, "loss_num": 0.043701171875, "loss_xval": 0.77734375, "num_input_tokens_seen": 102029524, "step": 1628 }, { "epoch": 5.4209650582362725, "grad_norm": 14.205424308776855, "learning_rate": 5e-06, "loss": 0.5817, "num_input_tokens_seen": 102093696, "step": 1629 }, { "epoch": 5.4209650582362725, "loss": 0.5077414512634277, "loss_ce": 0.00017305587243754417, "loss_iou": 0.1845703125, "loss_num": 0.027587890625, "loss_xval": 0.5078125, "num_input_tokens_seen": 102093696, "step": 1629 }, { "epoch": 5.424292845257903, "grad_norm": 9.155461311340332, "learning_rate": 5e-06, "loss": 0.5884, "num_input_tokens_seen": 102155176, "step": 1630 }, { "epoch": 5.424292845257903, "loss": 0.5432171821594238, "loss_ce": 0.0003704904520418495, "loss_iou": 0.1826171875, "loss_num": 0.035400390625, "loss_xval": 0.54296875, "num_input_tokens_seen": 102155176, "step": 1630 }, { "epoch": 5.427620632279534, "grad_norm": 8.620030403137207, "learning_rate": 5e-06, "loss": 0.6747, "num_input_tokens_seen": 102217376, "step": 1631 }, { "epoch": 5.427620632279534, "loss": 0.6632170081138611, "loss_ce": 9.006463187688496e-06, "loss_iou": 0.212890625, "loss_num": 0.047119140625, "loss_xval": 0.6640625, "num_input_tokens_seen": 102217376, "step": 1631 }, { "epoch": 5.430948419301164, "grad_norm": 10.027673721313477, "learning_rate": 5e-06, "loss": 0.5378, "num_input_tokens_seen": 102281180, "step": 1632 }, { "epoch": 5.430948419301164, "loss": 0.47565510869026184, "loss_ce": 0.00019121626974083483, "loss_iou": 0.1591796875, "loss_num": 0.031494140625, "loss_xval": 0.474609375, "num_input_tokens_seen": 102281180, "step": 1632 }, { "epoch": 5.434276206322795, "grad_norm": 52.82054901123047, "learning_rate": 5e-06, "loss": 0.8202, "num_input_tokens_seen": 102343984, "step": 1633 }, { "epoch": 5.434276206322795, "loss": 0.6174899935722351, "loss_ce": 0.0004245634190738201, "loss_iou": 0.1689453125, "loss_num": 0.055908203125, "loss_xval": 0.6171875, "num_input_tokens_seen": 102343984, "step": 1633 }, { "epoch": 5.437603993344426, "grad_norm": 13.176911354064941, "learning_rate": 5e-06, "loss": 0.9236, "num_input_tokens_seen": 102407440, "step": 1634 }, { "epoch": 5.437603993344426, "loss": 0.7702251076698303, "loss_ce": 0.0005717647145502269, "loss_iou": 0.2353515625, "loss_num": 0.06005859375, "loss_xval": 0.76953125, "num_input_tokens_seen": 102407440, "step": 1634 }, { "epoch": 5.440931780366056, "grad_norm": 10.518148422241211, "learning_rate": 5e-06, "loss": 0.7139, "num_input_tokens_seen": 102470212, "step": 1635 }, { "epoch": 5.440931780366056, "loss": 0.7326650619506836, "loss_ce": 0.0007314276299439371, "loss_iou": 0.22265625, "loss_num": 0.056884765625, "loss_xval": 0.73046875, "num_input_tokens_seen": 102470212, "step": 1635 }, { "epoch": 5.444259567387687, "grad_norm": 15.578624725341797, "learning_rate": 5e-06, "loss": 0.8894, "num_input_tokens_seen": 102534180, "step": 1636 }, { "epoch": 5.444259567387687, "loss": 1.045713186264038, "loss_ce": 0.0006692318129353225, "loss_iou": 0.33984375, "loss_num": 0.07275390625, "loss_xval": 1.046875, "num_input_tokens_seen": 102534180, "step": 1636 }, { "epoch": 5.447587354409317, "grad_norm": 11.064337730407715, "learning_rate": 5e-06, "loss": 0.6611, "num_input_tokens_seen": 102596244, "step": 1637 }, { "epoch": 5.447587354409317, "loss": 0.8054574728012085, "loss_ce": 0.00015959216398186982, "loss_iou": 0.2578125, "loss_num": 0.05810546875, "loss_xval": 0.8046875, "num_input_tokens_seen": 102596244, "step": 1637 }, { "epoch": 5.450915141430948, "grad_norm": 33.97342300415039, "learning_rate": 5e-06, "loss": 0.6283, "num_input_tokens_seen": 102658532, "step": 1638 }, { "epoch": 5.450915141430948, "loss": 0.6672518849372864, "loss_ce": 0.0009921238524839282, "loss_iou": 0.263671875, "loss_num": 0.0281982421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 102658532, "step": 1638 }, { "epoch": 5.454242928452579, "grad_norm": 8.071794509887695, "learning_rate": 5e-06, "loss": 0.6823, "num_input_tokens_seen": 102720076, "step": 1639 }, { "epoch": 5.454242928452579, "loss": 0.8720945715904236, "loss_ce": 2.4301192752318457e-05, "loss_iou": 0.298828125, "loss_num": 0.054443359375, "loss_xval": 0.87109375, "num_input_tokens_seen": 102720076, "step": 1639 }, { "epoch": 5.457570715474209, "grad_norm": 9.842574119567871, "learning_rate": 5e-06, "loss": 0.7563, "num_input_tokens_seen": 102782416, "step": 1640 }, { "epoch": 5.457570715474209, "loss": 0.6718653440475464, "loss_ce": 0.0009668688289821148, "loss_iou": 0.232421875, "loss_num": 0.041259765625, "loss_xval": 0.671875, "num_input_tokens_seen": 102782416, "step": 1640 }, { "epoch": 5.46089850249584, "grad_norm": 16.268909454345703, "learning_rate": 5e-06, "loss": 0.7966, "num_input_tokens_seen": 102845652, "step": 1641 }, { "epoch": 5.46089850249584, "loss": 0.8888282775878906, "loss_ce": 0.0006446776678785682, "loss_iou": 0.341796875, "loss_num": 0.04150390625, "loss_xval": 0.88671875, "num_input_tokens_seen": 102845652, "step": 1641 }, { "epoch": 5.4642262895174705, "grad_norm": 14.76564884185791, "learning_rate": 5e-06, "loss": 0.7013, "num_input_tokens_seen": 102906500, "step": 1642 }, { "epoch": 5.4642262895174705, "loss": 0.7110059261322021, "loss_ce": 0.0001294732792302966, "loss_iou": 0.1591796875, "loss_num": 0.07861328125, "loss_xval": 0.7109375, "num_input_tokens_seen": 102906500, "step": 1642 }, { "epoch": 5.467554076539101, "grad_norm": 19.5927791595459, "learning_rate": 5e-06, "loss": 0.5605, "num_input_tokens_seen": 102969944, "step": 1643 }, { "epoch": 5.467554076539101, "loss": 0.7769869565963745, "loss_ce": 9.381456948176492e-06, "loss_iou": 0.296875, "loss_num": 0.036865234375, "loss_xval": 0.77734375, "num_input_tokens_seen": 102969944, "step": 1643 }, { "epoch": 5.470881863560733, "grad_norm": 16.91885757446289, "learning_rate": 5e-06, "loss": 0.7235, "num_input_tokens_seen": 103033352, "step": 1644 }, { "epoch": 5.470881863560733, "loss": 0.531306803226471, "loss_ce": 0.0009113232372328639, "loss_iou": 0.1767578125, "loss_num": 0.03564453125, "loss_xval": 0.53125, "num_input_tokens_seen": 103033352, "step": 1644 }, { "epoch": 5.474209650582363, "grad_norm": 16.343990325927734, "learning_rate": 5e-06, "loss": 0.7818, "num_input_tokens_seen": 103097296, "step": 1645 }, { "epoch": 5.474209650582363, "loss": 1.0094261169433594, "loss_ce": 0.0006371331983245909, "loss_iou": 0.357421875, "loss_num": 0.058837890625, "loss_xval": 1.0078125, "num_input_tokens_seen": 103097296, "step": 1645 }, { "epoch": 5.477537437603994, "grad_norm": 7.790471076965332, "learning_rate": 5e-06, "loss": 0.5469, "num_input_tokens_seen": 103159720, "step": 1646 }, { "epoch": 5.477537437603994, "loss": 0.3706333041191101, "loss_ce": 2.7853264327859506e-05, "loss_iou": 0.1318359375, "loss_num": 0.021484375, "loss_xval": 0.37109375, "num_input_tokens_seen": 103159720, "step": 1646 }, { "epoch": 5.480865224625624, "grad_norm": 37.15055847167969, "learning_rate": 5e-06, "loss": 0.7048, "num_input_tokens_seen": 103222660, "step": 1647 }, { "epoch": 5.480865224625624, "loss": 0.6658989787101746, "loss_ce": 0.0004937172634527087, "loss_iou": 0.24609375, "loss_num": 0.03466796875, "loss_xval": 0.6640625, "num_input_tokens_seen": 103222660, "step": 1647 }, { "epoch": 5.484193011647255, "grad_norm": 27.953083038330078, "learning_rate": 5e-06, "loss": 0.8436, "num_input_tokens_seen": 103286316, "step": 1648 }, { "epoch": 5.484193011647255, "loss": 0.8316751718521118, "loss_ce": 1.0130894224857911e-05, "loss_iou": 0.259765625, "loss_num": 0.0625, "loss_xval": 0.83203125, "num_input_tokens_seen": 103286316, "step": 1648 }, { "epoch": 5.487520798668886, "grad_norm": 20.30833625793457, "learning_rate": 5e-06, "loss": 0.6182, "num_input_tokens_seen": 103349840, "step": 1649 }, { "epoch": 5.487520798668886, "loss": 0.5982181429862976, "loss_ce": 1.259712917089928e-05, "loss_iou": 0.2353515625, "loss_num": 0.025390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 103349840, "step": 1649 }, { "epoch": 5.490848585690516, "grad_norm": 20.08171272277832, "learning_rate": 5e-06, "loss": 0.5541, "num_input_tokens_seen": 103413016, "step": 1650 }, { "epoch": 5.490848585690516, "loss": 0.7060139775276184, "loss_ce": 0.0004475625464692712, "loss_iou": 0.287109375, "loss_num": 0.02587890625, "loss_xval": 0.70703125, "num_input_tokens_seen": 103413016, "step": 1650 }, { "epoch": 5.494176372712147, "grad_norm": 8.847830772399902, "learning_rate": 5e-06, "loss": 0.7636, "num_input_tokens_seen": 103477308, "step": 1651 }, { "epoch": 5.494176372712147, "loss": 0.9405819773674011, "loss_ce": 0.0003964377101510763, "loss_iou": 0.3671875, "loss_num": 0.041259765625, "loss_xval": 0.94140625, "num_input_tokens_seen": 103477308, "step": 1651 }, { "epoch": 5.4975041597337775, "grad_norm": 13.779448509216309, "learning_rate": 5e-06, "loss": 0.835, "num_input_tokens_seen": 103539928, "step": 1652 }, { "epoch": 5.4975041597337775, "loss": 1.0582094192504883, "loss_ce": 0.0008363999659195542, "loss_iou": 0.326171875, "loss_num": 0.0810546875, "loss_xval": 1.0546875, "num_input_tokens_seen": 103539928, "step": 1652 }, { "epoch": 5.500831946755408, "grad_norm": 21.17045021057129, "learning_rate": 5e-06, "loss": 0.6832, "num_input_tokens_seen": 103602172, "step": 1653 }, { "epoch": 5.500831946755408, "loss": 0.42666590213775635, "loss_ce": 0.00039636611472815275, "loss_iou": 0.1689453125, "loss_num": 0.0174560546875, "loss_xval": 0.42578125, "num_input_tokens_seen": 103602172, "step": 1653 }, { "epoch": 5.504159733777039, "grad_norm": 20.944114685058594, "learning_rate": 5e-06, "loss": 0.6127, "num_input_tokens_seen": 103663200, "step": 1654 }, { "epoch": 5.504159733777039, "loss": 0.7828108668327332, "loss_ce": 9.606579260434955e-05, "loss_iou": 0.263671875, "loss_num": 0.051025390625, "loss_xval": 0.78125, "num_input_tokens_seen": 103663200, "step": 1654 }, { "epoch": 5.507487520798669, "grad_norm": 8.944890022277832, "learning_rate": 5e-06, "loss": 0.6528, "num_input_tokens_seen": 103726100, "step": 1655 }, { "epoch": 5.507487520798669, "loss": 0.6580873727798462, "loss_ce": 0.00012836034875363111, "loss_iou": 0.25390625, "loss_num": 0.0303955078125, "loss_xval": 0.65625, "num_input_tokens_seen": 103726100, "step": 1655 }, { "epoch": 5.5108153078203, "grad_norm": 11.251640319824219, "learning_rate": 5e-06, "loss": 0.7174, "num_input_tokens_seen": 103788604, "step": 1656 }, { "epoch": 5.5108153078203, "loss": 0.6210998296737671, "loss_ce": 6.068248694646172e-06, "loss_iou": 0.2060546875, "loss_num": 0.041748046875, "loss_xval": 0.62109375, "num_input_tokens_seen": 103788604, "step": 1656 }, { "epoch": 5.5141430948419305, "grad_norm": 18.31918716430664, "learning_rate": 5e-06, "loss": 0.6866, "num_input_tokens_seen": 103851268, "step": 1657 }, { "epoch": 5.5141430948419305, "loss": 0.780825138092041, "loss_ce": 0.0007958478527143598, "loss_iou": 0.236328125, "loss_num": 0.0615234375, "loss_xval": 0.78125, "num_input_tokens_seen": 103851268, "step": 1657 }, { "epoch": 5.517470881863561, "grad_norm": 17.294458389282227, "learning_rate": 5e-06, "loss": 0.4707, "num_input_tokens_seen": 103912924, "step": 1658 }, { "epoch": 5.517470881863561, "loss": 0.45224249362945557, "loss_ce": 0.0002466700680088252, "loss_iou": 0.1630859375, "loss_num": 0.025390625, "loss_xval": 0.451171875, "num_input_tokens_seen": 103912924, "step": 1658 }, { "epoch": 5.520798668885192, "grad_norm": 8.51252269744873, "learning_rate": 5e-06, "loss": 0.6149, "num_input_tokens_seen": 103975188, "step": 1659 }, { "epoch": 5.520798668885192, "loss": 0.6649562120437622, "loss_ce": 0.00040545634692534804, "loss_iou": 0.2138671875, "loss_num": 0.047607421875, "loss_xval": 0.6640625, "num_input_tokens_seen": 103975188, "step": 1659 }, { "epoch": 5.524126455906822, "grad_norm": 15.996707916259766, "learning_rate": 5e-06, "loss": 0.7851, "num_input_tokens_seen": 104037616, "step": 1660 }, { "epoch": 5.524126455906822, "loss": 0.8272863626480103, "loss_ce": 1.5841425920370966e-05, "loss_iou": 0.27734375, "loss_num": 0.054443359375, "loss_xval": 0.828125, "num_input_tokens_seen": 104037616, "step": 1660 }, { "epoch": 5.527454242928453, "grad_norm": 12.95364761352539, "learning_rate": 5e-06, "loss": 0.5927, "num_input_tokens_seen": 104099448, "step": 1661 }, { "epoch": 5.527454242928453, "loss": 0.4973302185535431, "loss_ce": 0.0005956004024483263, "loss_iou": 0.1328125, "loss_num": 0.04638671875, "loss_xval": 0.49609375, "num_input_tokens_seen": 104099448, "step": 1661 }, { "epoch": 5.530782029950084, "grad_norm": 11.21883773803711, "learning_rate": 5e-06, "loss": 0.6192, "num_input_tokens_seen": 104161152, "step": 1662 }, { "epoch": 5.530782029950084, "loss": 0.8068508505821228, "loss_ce": 0.0006985386717133224, "loss_iou": 0.328125, "loss_num": 0.0302734375, "loss_xval": 0.8046875, "num_input_tokens_seen": 104161152, "step": 1662 }, { "epoch": 5.534109816971714, "grad_norm": 13.665806770324707, "learning_rate": 5e-06, "loss": 0.7915, "num_input_tokens_seen": 104224144, "step": 1663 }, { "epoch": 5.534109816971714, "loss": 0.9511141180992126, "loss_ce": 0.00043049981468357146, "loss_iou": 0.365234375, "loss_num": 0.044189453125, "loss_xval": 0.94921875, "num_input_tokens_seen": 104224144, "step": 1663 }, { "epoch": 5.537437603993345, "grad_norm": 8.892619132995605, "learning_rate": 5e-06, "loss": 0.8575, "num_input_tokens_seen": 104287548, "step": 1664 }, { "epoch": 5.537437603993345, "loss": 0.8013952970504761, "loss_ce": 0.001346514793112874, "loss_iou": 0.318359375, "loss_num": 0.03271484375, "loss_xval": 0.80078125, "num_input_tokens_seen": 104287548, "step": 1664 }, { "epoch": 5.5407653910149754, "grad_norm": 8.533040046691895, "learning_rate": 5e-06, "loss": 0.6726, "num_input_tokens_seen": 104348436, "step": 1665 }, { "epoch": 5.5407653910149754, "loss": 0.4962202310562134, "loss_ce": 4.4194543988851365e-06, "loss_iou": 0.1328125, "loss_num": 0.046142578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 104348436, "step": 1665 }, { "epoch": 5.544093178036606, "grad_norm": 15.838689804077148, "learning_rate": 5e-06, "loss": 0.7047, "num_input_tokens_seen": 104411136, "step": 1666 }, { "epoch": 5.544093178036606, "loss": 0.7474393844604492, "loss_ce": 2.877732640627073e-06, "loss_iou": 0.234375, "loss_num": 0.055908203125, "loss_xval": 0.74609375, "num_input_tokens_seen": 104411136, "step": 1666 }, { "epoch": 5.547420965058237, "grad_norm": 13.48023796081543, "learning_rate": 5e-06, "loss": 0.54, "num_input_tokens_seen": 104473460, "step": 1667 }, { "epoch": 5.547420965058237, "loss": 0.4172057807445526, "loss_ce": 0.0005798295023851097, "loss_iou": 0.1630859375, "loss_num": 0.0181884765625, "loss_xval": 0.416015625, "num_input_tokens_seen": 104473460, "step": 1667 }, { "epoch": 5.550748752079867, "grad_norm": 13.00550651550293, "learning_rate": 5e-06, "loss": 0.5776, "num_input_tokens_seen": 104535100, "step": 1668 }, { "epoch": 5.550748752079867, "loss": 0.5370232462882996, "loss_ce": 0.00028008728986606, "loss_iou": 0.1533203125, "loss_num": 0.046142578125, "loss_xval": 0.53515625, "num_input_tokens_seen": 104535100, "step": 1668 }, { "epoch": 5.554076539101498, "grad_norm": 25.64752769470215, "learning_rate": 5e-06, "loss": 0.5463, "num_input_tokens_seen": 104597888, "step": 1669 }, { "epoch": 5.554076539101498, "loss": 0.4867459237575531, "loss_ce": 0.0007840264588594437, "loss_iou": 0.1708984375, "loss_num": 0.02880859375, "loss_xval": 0.486328125, "num_input_tokens_seen": 104597888, "step": 1669 }, { "epoch": 5.5574043261231285, "grad_norm": 11.490925788879395, "learning_rate": 5e-06, "loss": 0.6501, "num_input_tokens_seen": 104659476, "step": 1670 }, { "epoch": 5.5574043261231285, "loss": 0.45913708209991455, "loss_ce": 3.0645875085610896e-05, "loss_iou": 0.1064453125, "loss_num": 0.049072265625, "loss_xval": 0.458984375, "num_input_tokens_seen": 104659476, "step": 1670 }, { "epoch": 5.560732113144759, "grad_norm": 11.503711700439453, "learning_rate": 5e-06, "loss": 0.7049, "num_input_tokens_seen": 104723016, "step": 1671 }, { "epoch": 5.560732113144759, "loss": 0.8446140289306641, "loss_ce": 0.0008640355663374066, "loss_iou": 0.314453125, "loss_num": 0.04345703125, "loss_xval": 0.84375, "num_input_tokens_seen": 104723016, "step": 1671 }, { "epoch": 5.56405990016639, "grad_norm": 8.733305931091309, "learning_rate": 5e-06, "loss": 0.5664, "num_input_tokens_seen": 104784856, "step": 1672 }, { "epoch": 5.56405990016639, "loss": 0.4931206703186035, "loss_ce": 0.0008110986091196537, "loss_iou": 0.1845703125, "loss_num": 0.024658203125, "loss_xval": 0.4921875, "num_input_tokens_seen": 104784856, "step": 1672 }, { "epoch": 5.56738768718802, "grad_norm": 9.786175727844238, "learning_rate": 5e-06, "loss": 0.846, "num_input_tokens_seen": 104848380, "step": 1673 }, { "epoch": 5.56738768718802, "loss": 0.7731995582580566, "loss_ce": 6.221240255399607e-06, "loss_iou": 0.2470703125, "loss_num": 0.0556640625, "loss_xval": 0.7734375, "num_input_tokens_seen": 104848380, "step": 1673 }, { "epoch": 5.570715474209651, "grad_norm": 13.724798202514648, "learning_rate": 5e-06, "loss": 0.6908, "num_input_tokens_seen": 104910960, "step": 1674 }, { "epoch": 5.570715474209651, "loss": 0.691491961479187, "loss_ce": 8.570987120037898e-05, "loss_iou": 0.25, "loss_num": 0.037841796875, "loss_xval": 0.69140625, "num_input_tokens_seen": 104910960, "step": 1674 }, { "epoch": 5.574043261231282, "grad_norm": 12.126524925231934, "learning_rate": 5e-06, "loss": 0.7477, "num_input_tokens_seen": 104974332, "step": 1675 }, { "epoch": 5.574043261231282, "loss": 0.45954495668411255, "loss_ce": 7.229376933537424e-05, "loss_iou": 0.1728515625, "loss_num": 0.022705078125, "loss_xval": 0.458984375, "num_input_tokens_seen": 104974332, "step": 1675 }, { "epoch": 5.577371048252912, "grad_norm": 21.586347579956055, "learning_rate": 5e-06, "loss": 0.6319, "num_input_tokens_seen": 105036216, "step": 1676 }, { "epoch": 5.577371048252912, "loss": 0.858528733253479, "loss_ce": 6.919073348399252e-05, "loss_iou": 0.3359375, "loss_num": 0.037353515625, "loss_xval": 0.859375, "num_input_tokens_seen": 105036216, "step": 1676 }, { "epoch": 5.580698835274543, "grad_norm": 6.689985275268555, "learning_rate": 5e-06, "loss": 0.5895, "num_input_tokens_seen": 105098888, "step": 1677 }, { "epoch": 5.580698835274543, "loss": 0.5708335041999817, "loss_ce": 0.0006430556531995535, "loss_iou": 0.158203125, "loss_num": 0.05078125, "loss_xval": 0.5703125, "num_input_tokens_seen": 105098888, "step": 1677 }, { "epoch": 5.584026622296173, "grad_norm": 23.635780334472656, "learning_rate": 5e-06, "loss": 0.6477, "num_input_tokens_seen": 105160692, "step": 1678 }, { "epoch": 5.584026622296173, "loss": 0.745747447013855, "loss_ce": 0.0005082354764454067, "loss_iou": 0.271484375, "loss_num": 0.0400390625, "loss_xval": 0.74609375, "num_input_tokens_seen": 105160692, "step": 1678 }, { "epoch": 5.587354409317804, "grad_norm": 17.890554428100586, "learning_rate": 5e-06, "loss": 0.8851, "num_input_tokens_seen": 105224932, "step": 1679 }, { "epoch": 5.587354409317804, "loss": 0.6944231986999512, "loss_ce": 0.0005755070014856756, "loss_iou": 0.2890625, "loss_num": 0.0233154296875, "loss_xval": 0.6953125, "num_input_tokens_seen": 105224932, "step": 1679 }, { "epoch": 5.590682196339435, "grad_norm": 36.01473617553711, "learning_rate": 5e-06, "loss": 0.5336, "num_input_tokens_seen": 105287516, "step": 1680 }, { "epoch": 5.590682196339435, "loss": 0.4492305517196655, "loss_ce": 1.179968148790067e-05, "loss_iou": 0.13671875, "loss_num": 0.03515625, "loss_xval": 0.44921875, "num_input_tokens_seen": 105287516, "step": 1680 }, { "epoch": 5.594009983361065, "grad_norm": 15.766006469726562, "learning_rate": 5e-06, "loss": 0.8177, "num_input_tokens_seen": 105349820, "step": 1681 }, { "epoch": 5.594009983361065, "loss": 0.9034368991851807, "loss_ce": 0.00011656155402306467, "loss_iou": 0.330078125, "loss_num": 0.048583984375, "loss_xval": 0.90234375, "num_input_tokens_seen": 105349820, "step": 1681 }, { "epoch": 5.597337770382696, "grad_norm": 10.683774948120117, "learning_rate": 5e-06, "loss": 0.596, "num_input_tokens_seen": 105411476, "step": 1682 }, { "epoch": 5.597337770382696, "loss": 0.8543940186500549, "loss_ce": 0.000634245399851352, "loss_iou": 0.291015625, "loss_num": 0.0546875, "loss_xval": 0.85546875, "num_input_tokens_seen": 105411476, "step": 1682 }, { "epoch": 5.6006655574043265, "grad_norm": 15.730971336364746, "learning_rate": 5e-06, "loss": 0.7665, "num_input_tokens_seen": 105473888, "step": 1683 }, { "epoch": 5.6006655574043265, "loss": 0.8576712012290955, "loss_ce": 5.173462795937667e-06, "loss_iou": 0.28515625, "loss_num": 0.057861328125, "loss_xval": 0.859375, "num_input_tokens_seen": 105473888, "step": 1683 }, { "epoch": 5.603993344425957, "grad_norm": 17.79537582397461, "learning_rate": 5e-06, "loss": 0.7979, "num_input_tokens_seen": 105537972, "step": 1684 }, { "epoch": 5.603993344425957, "loss": 0.8166719675064087, "loss_ce": 0.00026570551563054323, "loss_iou": 0.279296875, "loss_num": 0.051513671875, "loss_xval": 0.81640625, "num_input_tokens_seen": 105537972, "step": 1684 }, { "epoch": 5.607321131447588, "grad_norm": 15.09589672088623, "learning_rate": 5e-06, "loss": 0.6113, "num_input_tokens_seen": 105600116, "step": 1685 }, { "epoch": 5.607321131447588, "loss": 0.7013496160507202, "loss_ce": 5.5670123401796445e-05, "loss_iou": 0.1875, "loss_num": 0.0654296875, "loss_xval": 0.703125, "num_input_tokens_seen": 105600116, "step": 1685 }, { "epoch": 5.610648918469218, "grad_norm": 9.822373390197754, "learning_rate": 5e-06, "loss": 0.5208, "num_input_tokens_seen": 105661696, "step": 1686 }, { "epoch": 5.610648918469218, "loss": 0.4002895951271057, "loss_ce": 0.00044828990940004587, "loss_iou": 0.1044921875, "loss_num": 0.0380859375, "loss_xval": 0.400390625, "num_input_tokens_seen": 105661696, "step": 1686 }, { "epoch": 5.613976705490849, "grad_norm": 12.23663330078125, "learning_rate": 5e-06, "loss": 0.8393, "num_input_tokens_seen": 105724768, "step": 1687 }, { "epoch": 5.613976705490849, "loss": 0.684592604637146, "loss_ce": 2.223967749159783e-05, "loss_iou": 0.2216796875, "loss_num": 0.048095703125, "loss_xval": 0.68359375, "num_input_tokens_seen": 105724768, "step": 1687 }, { "epoch": 5.6173044925124795, "grad_norm": 38.26483917236328, "learning_rate": 5e-06, "loss": 0.5627, "num_input_tokens_seen": 105787560, "step": 1688 }, { "epoch": 5.6173044925124795, "loss": 0.44944727420806885, "loss_ce": 0.0004726658225990832, "loss_iou": 0.150390625, "loss_num": 0.0296630859375, "loss_xval": 0.44921875, "num_input_tokens_seen": 105787560, "step": 1688 }, { "epoch": 5.62063227953411, "grad_norm": 17.305635452270508, "learning_rate": 5e-06, "loss": 0.6798, "num_input_tokens_seen": 105851016, "step": 1689 }, { "epoch": 5.62063227953411, "loss": 0.553048849105835, "loss_ce": 9.341451004729606e-06, "loss_iou": 0.20703125, "loss_num": 0.02783203125, "loss_xval": 0.5546875, "num_input_tokens_seen": 105851016, "step": 1689 }, { "epoch": 5.623960066555741, "grad_norm": 19.972763061523438, "learning_rate": 5e-06, "loss": 0.7356, "num_input_tokens_seen": 105912112, "step": 1690 }, { "epoch": 5.623960066555741, "loss": 0.8830522298812866, "loss_ce": 0.00023976643569767475, "loss_iou": 0.310546875, "loss_num": 0.05224609375, "loss_xval": 0.8828125, "num_input_tokens_seen": 105912112, "step": 1690 }, { "epoch": 5.627287853577371, "grad_norm": 66.9946060180664, "learning_rate": 5e-06, "loss": 0.8263, "num_input_tokens_seen": 105975356, "step": 1691 }, { "epoch": 5.627287853577371, "loss": 0.643821120262146, "loss_ce": 2.2260259356698953e-05, "loss_iou": 0.2333984375, "loss_num": 0.035400390625, "loss_xval": 0.64453125, "num_input_tokens_seen": 105975356, "step": 1691 }, { "epoch": 5.630615640599002, "grad_norm": 33.13805389404297, "learning_rate": 5e-06, "loss": 0.6637, "num_input_tokens_seen": 106037320, "step": 1692 }, { "epoch": 5.630615640599002, "loss": 0.5694460272789001, "loss_ce": 0.00035422114888206124, "loss_iou": 0.2294921875, "loss_num": 0.022216796875, "loss_xval": 0.5703125, "num_input_tokens_seen": 106037320, "step": 1692 }, { "epoch": 5.633943427620633, "grad_norm": 15.999832153320312, "learning_rate": 5e-06, "loss": 0.7618, "num_input_tokens_seen": 106100968, "step": 1693 }, { "epoch": 5.633943427620633, "loss": 0.5870428085327148, "loss_ce": 0.0003728655865415931, "loss_iou": 0.208984375, "loss_num": 0.03369140625, "loss_xval": 0.5859375, "num_input_tokens_seen": 106100968, "step": 1693 }, { "epoch": 5.637271214642263, "grad_norm": 22.809717178344727, "learning_rate": 5e-06, "loss": 0.5141, "num_input_tokens_seen": 106163984, "step": 1694 }, { "epoch": 5.637271214642263, "loss": 0.5070875883102417, "loss_ce": 7.488038590963697e-06, "loss_iou": 0.1787109375, "loss_num": 0.0301513671875, "loss_xval": 0.5078125, "num_input_tokens_seen": 106163984, "step": 1694 }, { "epoch": 5.640599001663894, "grad_norm": 10.32789134979248, "learning_rate": 5e-06, "loss": 0.4488, "num_input_tokens_seen": 106225564, "step": 1695 }, { "epoch": 5.640599001663894, "loss": 0.332084059715271, "loss_ce": 7.034345344436588e-06, "loss_iou": 0.1103515625, "loss_num": 0.022216796875, "loss_xval": 0.33203125, "num_input_tokens_seen": 106225564, "step": 1695 }, { "epoch": 5.643926788685524, "grad_norm": 7.523070335388184, "learning_rate": 5e-06, "loss": 0.6172, "num_input_tokens_seen": 106287900, "step": 1696 }, { "epoch": 5.643926788685524, "loss": 0.775996208190918, "loss_ce": 0.00023933117336127907, "loss_iou": 0.27734375, "loss_num": 0.04443359375, "loss_xval": 0.77734375, "num_input_tokens_seen": 106287900, "step": 1696 }, { "epoch": 5.647254575707155, "grad_norm": 15.663951873779297, "learning_rate": 5e-06, "loss": 0.4675, "num_input_tokens_seen": 106348884, "step": 1697 }, { "epoch": 5.647254575707155, "loss": 0.4975832402706146, "loss_ce": 2.462265911162831e-05, "loss_iou": 0.169921875, "loss_num": 0.031494140625, "loss_xval": 0.498046875, "num_input_tokens_seen": 106348884, "step": 1697 }, { "epoch": 5.650582362728786, "grad_norm": 9.897237777709961, "learning_rate": 5e-06, "loss": 0.7378, "num_input_tokens_seen": 106411368, "step": 1698 }, { "epoch": 5.650582362728786, "loss": 0.7313656806945801, "loss_ce": 0.0006527729565277696, "loss_iou": 0.244140625, "loss_num": 0.04833984375, "loss_xval": 0.73046875, "num_input_tokens_seen": 106411368, "step": 1698 }, { "epoch": 5.653910149750416, "grad_norm": 27.847606658935547, "learning_rate": 5e-06, "loss": 0.7811, "num_input_tokens_seen": 106473388, "step": 1699 }, { "epoch": 5.653910149750416, "loss": 1.0008139610290527, "loss_ce": 0.0003256350464653224, "loss_iou": 0.32421875, "loss_num": 0.07080078125, "loss_xval": 1.0, "num_input_tokens_seen": 106473388, "step": 1699 }, { "epoch": 5.657237936772047, "grad_norm": 11.24642276763916, "learning_rate": 5e-06, "loss": 0.8014, "num_input_tokens_seen": 106537688, "step": 1700 }, { "epoch": 5.657237936772047, "loss": 0.9711390137672424, "loss_ce": 0.0006800297414883971, "loss_iou": 0.326171875, "loss_num": 0.0634765625, "loss_xval": 0.96875, "num_input_tokens_seen": 106537688, "step": 1700 }, { "epoch": 5.6605657237936775, "grad_norm": 14.969793319702148, "learning_rate": 5e-06, "loss": 0.7234, "num_input_tokens_seen": 106600456, "step": 1701 }, { "epoch": 5.6605657237936775, "loss": 0.5358263254165649, "loss_ce": 0.000670083099976182, "loss_iou": 0.1923828125, "loss_num": 0.030029296875, "loss_xval": 0.53515625, "num_input_tokens_seen": 106600456, "step": 1701 }, { "epoch": 5.663893510815308, "grad_norm": 12.321464538574219, "learning_rate": 5e-06, "loss": 0.5965, "num_input_tokens_seen": 106662656, "step": 1702 }, { "epoch": 5.663893510815308, "loss": 0.7050811052322388, "loss_ce": 0.000857477483805269, "loss_iou": 0.205078125, "loss_num": 0.058837890625, "loss_xval": 0.703125, "num_input_tokens_seen": 106662656, "step": 1702 }, { "epoch": 5.667221297836939, "grad_norm": 17.309473037719727, "learning_rate": 5e-06, "loss": 0.8267, "num_input_tokens_seen": 106726504, "step": 1703 }, { "epoch": 5.667221297836939, "loss": 0.8586465120315552, "loss_ce": 3.938740519515704e-06, "loss_iou": 0.330078125, "loss_num": 0.039306640625, "loss_xval": 0.859375, "num_input_tokens_seen": 106726504, "step": 1703 }, { "epoch": 5.670549084858569, "grad_norm": 21.560821533203125, "learning_rate": 5e-06, "loss": 0.6404, "num_input_tokens_seen": 106789148, "step": 1704 }, { "epoch": 5.670549084858569, "loss": 0.4272652268409729, "loss_ce": 1.9131181034026667e-05, "loss_iou": 0.1396484375, "loss_num": 0.0294189453125, "loss_xval": 0.427734375, "num_input_tokens_seen": 106789148, "step": 1704 }, { "epoch": 5.6738768718802, "grad_norm": 68.77386474609375, "learning_rate": 5e-06, "loss": 0.7511, "num_input_tokens_seen": 106851232, "step": 1705 }, { "epoch": 5.6738768718802, "loss": 0.5564388632774353, "loss_ce": 4.237954271957278e-05, "loss_iou": 0.166015625, "loss_num": 0.044921875, "loss_xval": 0.5546875, "num_input_tokens_seen": 106851232, "step": 1705 }, { "epoch": 5.677204658901831, "grad_norm": 9.309255599975586, "learning_rate": 5e-06, "loss": 0.6206, "num_input_tokens_seen": 106914752, "step": 1706 }, { "epoch": 5.677204658901831, "loss": 0.5360674858093262, "loss_ce": 5.677162334905006e-05, "loss_iou": 0.212890625, "loss_num": 0.022216796875, "loss_xval": 0.53515625, "num_input_tokens_seen": 106914752, "step": 1706 }, { "epoch": 5.680532445923461, "grad_norm": 9.004875183105469, "learning_rate": 5e-06, "loss": 0.5238, "num_input_tokens_seen": 106976952, "step": 1707 }, { "epoch": 5.680532445923461, "loss": 0.45058244466781616, "loss_ce": 0.00014298340829554945, "loss_iou": 0.1279296875, "loss_num": 0.038818359375, "loss_xval": 0.451171875, "num_input_tokens_seen": 106976952, "step": 1707 }, { "epoch": 5.683860232945092, "grad_norm": 12.503425598144531, "learning_rate": 5e-06, "loss": 0.6518, "num_input_tokens_seen": 107039528, "step": 1708 }, { "epoch": 5.683860232945092, "loss": 0.7054813504219055, "loss_ce": 0.000281153799733147, "loss_iou": 0.2236328125, "loss_num": 0.0517578125, "loss_xval": 0.70703125, "num_input_tokens_seen": 107039528, "step": 1708 }, { "epoch": 5.687188019966722, "grad_norm": 123.16822052001953, "learning_rate": 5e-06, "loss": 0.6203, "num_input_tokens_seen": 107103448, "step": 1709 }, { "epoch": 5.687188019966722, "loss": 0.49697205424308777, "loss_ce": 0.00014590048522222787, "loss_iou": 0.1650390625, "loss_num": 0.033203125, "loss_xval": 0.49609375, "num_input_tokens_seen": 107103448, "step": 1709 }, { "epoch": 5.690515806988353, "grad_norm": 19.90471839904785, "learning_rate": 5e-06, "loss": 0.6721, "num_input_tokens_seen": 107165844, "step": 1710 }, { "epoch": 5.690515806988353, "loss": 0.703316330909729, "loss_ce": 0.00019130959117319435, "loss_iou": 0.2138671875, "loss_num": 0.05517578125, "loss_xval": 0.703125, "num_input_tokens_seen": 107165844, "step": 1710 }, { "epoch": 5.693843594009984, "grad_norm": 26.4517765045166, "learning_rate": 5e-06, "loss": 0.7115, "num_input_tokens_seen": 107228880, "step": 1711 }, { "epoch": 5.693843594009984, "loss": 0.5219588279724121, "loss_ce": 0.0009016690892167389, "loss_iou": 0.1787109375, "loss_num": 0.032958984375, "loss_xval": 0.51953125, "num_input_tokens_seen": 107228880, "step": 1711 }, { "epoch": 5.697171381031614, "grad_norm": 24.016637802124023, "learning_rate": 5e-06, "loss": 0.6032, "num_input_tokens_seen": 107291652, "step": 1712 }, { "epoch": 5.697171381031614, "loss": 0.6287029385566711, "loss_ce": 0.0006511926185339689, "loss_iou": 0.1416015625, "loss_num": 0.06884765625, "loss_xval": 0.62890625, "num_input_tokens_seen": 107291652, "step": 1712 }, { "epoch": 5.700499168053245, "grad_norm": 12.63536548614502, "learning_rate": 5e-06, "loss": 0.5321, "num_input_tokens_seen": 107353984, "step": 1713 }, { "epoch": 5.700499168053245, "loss": 0.6348440647125244, "loss_ce": 0.001055013621225953, "loss_iou": 0.2236328125, "loss_num": 0.037109375, "loss_xval": 0.6328125, "num_input_tokens_seen": 107353984, "step": 1713 }, { "epoch": 5.7038269550748755, "grad_norm": 28.98735237121582, "learning_rate": 5e-06, "loss": 0.8102, "num_input_tokens_seen": 107417396, "step": 1714 }, { "epoch": 5.7038269550748755, "loss": 0.7450417280197144, "loss_ce": 0.00016867804515641183, "loss_iou": 0.265625, "loss_num": 0.042724609375, "loss_xval": 0.74609375, "num_input_tokens_seen": 107417396, "step": 1714 }, { "epoch": 5.707154742096506, "grad_norm": 12.304791450500488, "learning_rate": 5e-06, "loss": 0.4742, "num_input_tokens_seen": 107480376, "step": 1715 }, { "epoch": 5.707154742096506, "loss": 0.4631630778312683, "loss_ce": 8.936785161495209e-05, "loss_iou": 0.1513671875, "loss_num": 0.0322265625, "loss_xval": 0.462890625, "num_input_tokens_seen": 107480376, "step": 1715 }, { "epoch": 5.710482529118137, "grad_norm": 14.445513725280762, "learning_rate": 5e-06, "loss": 0.8061, "num_input_tokens_seen": 107544188, "step": 1716 }, { "epoch": 5.710482529118137, "loss": 0.8209518194198608, "loss_ce": 0.0010055896127596498, "loss_iou": 0.2470703125, "loss_num": 0.06494140625, "loss_xval": 0.8203125, "num_input_tokens_seen": 107544188, "step": 1716 }, { "epoch": 5.713810316139767, "grad_norm": 8.823054313659668, "learning_rate": 5e-06, "loss": 0.4795, "num_input_tokens_seen": 107605148, "step": 1717 }, { "epoch": 5.713810316139767, "loss": 0.4528508484363556, "loss_ce": 0.0004582681867759675, "loss_iou": 0.078125, "loss_num": 0.059326171875, "loss_xval": 0.453125, "num_input_tokens_seen": 107605148, "step": 1717 }, { "epoch": 5.717138103161398, "grad_norm": 16.8961181640625, "learning_rate": 5e-06, "loss": 0.4838, "num_input_tokens_seen": 107668080, "step": 1718 }, { "epoch": 5.717138103161398, "loss": 0.4450841546058655, "loss_ce": 0.0009313120390288532, "loss_iou": 0.15234375, "loss_num": 0.0281982421875, "loss_xval": 0.443359375, "num_input_tokens_seen": 107668080, "step": 1718 }, { "epoch": 5.7204658901830285, "grad_norm": 9.961505889892578, "learning_rate": 5e-06, "loss": 0.341, "num_input_tokens_seen": 107729940, "step": 1719 }, { "epoch": 5.7204658901830285, "loss": 0.3053891658782959, "loss_ce": 9.12985997274518e-05, "loss_iou": 0.078125, "loss_num": 0.0299072265625, "loss_xval": 0.3046875, "num_input_tokens_seen": 107729940, "step": 1719 }, { "epoch": 5.723793677204659, "grad_norm": 11.72249984741211, "learning_rate": 5e-06, "loss": 0.6196, "num_input_tokens_seen": 107791916, "step": 1720 }, { "epoch": 5.723793677204659, "loss": 0.8364963531494141, "loss_ce": 0.0005588560015894473, "loss_iou": 0.314453125, "loss_num": 0.04150390625, "loss_xval": 0.8359375, "num_input_tokens_seen": 107791916, "step": 1720 }, { "epoch": 5.72712146422629, "grad_norm": 22.064348220825195, "learning_rate": 5e-06, "loss": 0.6433, "num_input_tokens_seen": 107855036, "step": 1721 }, { "epoch": 5.72712146422629, "loss": 0.5307466983795166, "loss_ce": 0.0015108881052583456, "loss_iou": 0.13671875, "loss_num": 0.05126953125, "loss_xval": 0.52734375, "num_input_tokens_seen": 107855036, "step": 1721 }, { "epoch": 5.73044925124792, "grad_norm": 9.683394432067871, "learning_rate": 5e-06, "loss": 0.6907, "num_input_tokens_seen": 107918084, "step": 1722 }, { "epoch": 5.73044925124792, "loss": 0.5834569931030273, "loss_ce": 0.00038815615698695183, "loss_iou": 0.1796875, "loss_num": 0.044921875, "loss_xval": 0.58203125, "num_input_tokens_seen": 107918084, "step": 1722 }, { "epoch": 5.733777038269551, "grad_norm": 6.485795974731445, "learning_rate": 5e-06, "loss": 0.4933, "num_input_tokens_seen": 107980216, "step": 1723 }, { "epoch": 5.733777038269551, "loss": 0.6450976133346558, "loss_ce": 0.0006884537287987769, "loss_iou": 0.1630859375, "loss_num": 0.06396484375, "loss_xval": 0.64453125, "num_input_tokens_seen": 107980216, "step": 1723 }, { "epoch": 5.737104825291182, "grad_norm": 17.993772506713867, "learning_rate": 5e-06, "loss": 0.7217, "num_input_tokens_seen": 108041460, "step": 1724 }, { "epoch": 5.737104825291182, "loss": 0.8249725103378296, "loss_ce": 8.237032307079062e-05, "loss_iou": 0.298828125, "loss_num": 0.04541015625, "loss_xval": 0.82421875, "num_input_tokens_seen": 108041460, "step": 1724 }, { "epoch": 5.740432612312812, "grad_norm": 23.316713333129883, "learning_rate": 5e-06, "loss": 0.624, "num_input_tokens_seen": 108104100, "step": 1725 }, { "epoch": 5.740432612312812, "loss": 0.7806700468063354, "loss_ce": 0.002044552704319358, "loss_iou": 0.2314453125, "loss_num": 0.06298828125, "loss_xval": 0.77734375, "num_input_tokens_seen": 108104100, "step": 1725 }, { "epoch": 5.743760399334443, "grad_norm": 14.220916748046875, "learning_rate": 5e-06, "loss": 0.8526, "num_input_tokens_seen": 108166016, "step": 1726 }, { "epoch": 5.743760399334443, "loss": 0.9530130624771118, "loss_ce": 1.0050905984826386e-05, "loss_iou": 0.31640625, "loss_num": 0.06396484375, "loss_xval": 0.953125, "num_input_tokens_seen": 108166016, "step": 1726 }, { "epoch": 5.747088186356073, "grad_norm": 19.730939865112305, "learning_rate": 5e-06, "loss": 0.7547, "num_input_tokens_seen": 108228780, "step": 1727 }, { "epoch": 5.747088186356073, "loss": 1.039328932762146, "loss_ce": 0.0002664237981662154, "loss_iou": 0.322265625, "loss_num": 0.0791015625, "loss_xval": 1.0390625, "num_input_tokens_seen": 108228780, "step": 1727 }, { "epoch": 5.750415973377704, "grad_norm": 12.41170597076416, "learning_rate": 5e-06, "loss": 0.7916, "num_input_tokens_seen": 108292028, "step": 1728 }, { "epoch": 5.750415973377704, "loss": 0.9140459299087524, "loss_ce": 0.0005938184331171215, "loss_iou": 0.287109375, "loss_num": 0.0673828125, "loss_xval": 0.9140625, "num_input_tokens_seen": 108292028, "step": 1728 }, { "epoch": 5.753743760399335, "grad_norm": 13.194222450256348, "learning_rate": 5e-06, "loss": 0.8602, "num_input_tokens_seen": 108355096, "step": 1729 }, { "epoch": 5.753743760399335, "loss": 0.9234409928321838, "loss_ce": 0.00010111027950188145, "loss_iou": 0.357421875, "loss_num": 0.041748046875, "loss_xval": 0.921875, "num_input_tokens_seen": 108355096, "step": 1729 }, { "epoch": 5.757071547420965, "grad_norm": 20.04388427734375, "learning_rate": 5e-06, "loss": 0.7181, "num_input_tokens_seen": 108417048, "step": 1730 }, { "epoch": 5.757071547420965, "loss": 0.9073438048362732, "loss_ce": 0.0006054791738279164, "loss_iou": 0.330078125, "loss_num": 0.04931640625, "loss_xval": 0.90625, "num_input_tokens_seen": 108417048, "step": 1730 }, { "epoch": 5.760399334442596, "grad_norm": 17.463380813598633, "learning_rate": 5e-06, "loss": 0.741, "num_input_tokens_seen": 108480692, "step": 1731 }, { "epoch": 5.760399334442596, "loss": 0.6669446229934692, "loss_ce": 7.449155964422971e-05, "loss_iou": 0.224609375, "loss_num": 0.043701171875, "loss_xval": 0.66796875, "num_input_tokens_seen": 108480692, "step": 1731 }, { "epoch": 5.7637271214642265, "grad_norm": 22.218618392944336, "learning_rate": 5e-06, "loss": 0.5948, "num_input_tokens_seen": 108542816, "step": 1732 }, { "epoch": 5.7637271214642265, "loss": 0.4774358868598938, "loss_ce": 0.000995467184111476, "loss_iou": 0.150390625, "loss_num": 0.03515625, "loss_xval": 0.4765625, "num_input_tokens_seen": 108542816, "step": 1732 }, { "epoch": 5.767054908485857, "grad_norm": 12.377209663391113, "learning_rate": 5e-06, "loss": 0.4993, "num_input_tokens_seen": 108606396, "step": 1733 }, { "epoch": 5.767054908485857, "loss": 0.5402237176895142, "loss_ce": 0.00019988913845736533, "loss_iou": 0.1767578125, "loss_num": 0.037353515625, "loss_xval": 0.5390625, "num_input_tokens_seen": 108606396, "step": 1733 }, { "epoch": 5.770382695507488, "grad_norm": 9.325462341308594, "learning_rate": 5e-06, "loss": 0.8247, "num_input_tokens_seen": 108668468, "step": 1734 }, { "epoch": 5.770382695507488, "loss": 0.9547851085662842, "loss_ce": 0.00397943751886487, "loss_iou": 0.302734375, "loss_num": 0.0693359375, "loss_xval": 0.94921875, "num_input_tokens_seen": 108668468, "step": 1734 }, { "epoch": 5.773710482529118, "grad_norm": 12.919450759887695, "learning_rate": 5e-06, "loss": 0.6592, "num_input_tokens_seen": 108731260, "step": 1735 }, { "epoch": 5.773710482529118, "loss": 0.5413807034492493, "loss_ce": 0.0009144245414063334, "loss_iou": 0.1748046875, "loss_num": 0.0380859375, "loss_xval": 0.5390625, "num_input_tokens_seen": 108731260, "step": 1735 }, { "epoch": 5.777038269550749, "grad_norm": 11.477002143859863, "learning_rate": 5e-06, "loss": 0.8761, "num_input_tokens_seen": 108795324, "step": 1736 }, { "epoch": 5.777038269550749, "loss": 0.9495025873184204, "loss_ce": 0.0010161716490983963, "loss_iou": 0.306640625, "loss_num": 0.06640625, "loss_xval": 0.94921875, "num_input_tokens_seen": 108795324, "step": 1736 }, { "epoch": 5.78036605657238, "grad_norm": 9.099837303161621, "learning_rate": 5e-06, "loss": 0.7722, "num_input_tokens_seen": 108857560, "step": 1737 }, { "epoch": 5.78036605657238, "loss": 0.7459558248519897, "loss_ce": 0.000472470186650753, "loss_iou": 0.24609375, "loss_num": 0.05078125, "loss_xval": 0.74609375, "num_input_tokens_seen": 108857560, "step": 1737 }, { "epoch": 5.78369384359401, "grad_norm": 14.444173812866211, "learning_rate": 5e-06, "loss": 0.4758, "num_input_tokens_seen": 108918780, "step": 1738 }, { "epoch": 5.78369384359401, "loss": 0.4299461543560028, "loss_ce": 1.4529123291140422e-05, "loss_iou": 0.12353515625, "loss_num": 0.03662109375, "loss_xval": 0.4296875, "num_input_tokens_seen": 108918780, "step": 1738 }, { "epoch": 5.787021630615641, "grad_norm": 17.906532287597656, "learning_rate": 5e-06, "loss": 0.6279, "num_input_tokens_seen": 108980980, "step": 1739 }, { "epoch": 5.787021630615641, "loss": 0.6438461542129517, "loss_ce": 0.0005356057081371546, "loss_iou": 0.2431640625, "loss_num": 0.031494140625, "loss_xval": 0.64453125, "num_input_tokens_seen": 108980980, "step": 1739 }, { "epoch": 5.790349417637271, "grad_norm": 13.573742866516113, "learning_rate": 5e-06, "loss": 0.7242, "num_input_tokens_seen": 109043940, "step": 1740 }, { "epoch": 5.790349417637271, "loss": 0.9529647827148438, "loss_ce": 0.0005722053465433419, "loss_iou": 0.333984375, "loss_num": 0.056884765625, "loss_xval": 0.953125, "num_input_tokens_seen": 109043940, "step": 1740 }, { "epoch": 5.793677204658902, "grad_norm": 13.186030387878418, "learning_rate": 5e-06, "loss": 0.6727, "num_input_tokens_seen": 109106144, "step": 1741 }, { "epoch": 5.793677204658902, "loss": 0.6280597448348999, "loss_ce": 0.000374168943380937, "loss_iou": 0.2314453125, "loss_num": 0.033203125, "loss_xval": 0.62890625, "num_input_tokens_seen": 109106144, "step": 1741 }, { "epoch": 5.797004991680533, "grad_norm": 11.846780776977539, "learning_rate": 5e-06, "loss": 0.5714, "num_input_tokens_seen": 109169260, "step": 1742 }, { "epoch": 5.797004991680533, "loss": 0.6322175860404968, "loss_ce": 1.547130523249507e-05, "loss_iou": 0.20703125, "loss_num": 0.04345703125, "loss_xval": 0.6328125, "num_input_tokens_seen": 109169260, "step": 1742 }, { "epoch": 5.800332778702163, "grad_norm": 17.20931053161621, "learning_rate": 5e-06, "loss": 0.6911, "num_input_tokens_seen": 109232900, "step": 1743 }, { "epoch": 5.800332778702163, "loss": 0.5807763338088989, "loss_ce": 0.00020993230282329023, "loss_iou": 0.150390625, "loss_num": 0.05615234375, "loss_xval": 0.58203125, "num_input_tokens_seen": 109232900, "step": 1743 }, { "epoch": 5.803660565723794, "grad_norm": 17.745994567871094, "learning_rate": 5e-06, "loss": 0.7031, "num_input_tokens_seen": 109295748, "step": 1744 }, { "epoch": 5.803660565723794, "loss": 0.8015212416648865, "loss_ce": 7.5699854278354906e-06, "loss_iou": 0.29296875, "loss_num": 0.04296875, "loss_xval": 0.80078125, "num_input_tokens_seen": 109295748, "step": 1744 }, { "epoch": 5.8069883527454245, "grad_norm": 8.905264854431152, "learning_rate": 5e-06, "loss": 0.4016, "num_input_tokens_seen": 109358312, "step": 1745 }, { "epoch": 5.8069883527454245, "loss": 0.39209282398223877, "loss_ce": 2.9924158297944814e-06, "loss_iou": 0.109375, "loss_num": 0.03466796875, "loss_xval": 0.392578125, "num_input_tokens_seen": 109358312, "step": 1745 }, { "epoch": 5.810316139767055, "grad_norm": 5.492832660675049, "learning_rate": 5e-06, "loss": 0.593, "num_input_tokens_seen": 109421796, "step": 1746 }, { "epoch": 5.810316139767055, "loss": 0.4544283151626587, "loss_ce": 0.00014364512753672898, "loss_iou": 0.150390625, "loss_num": 0.0303955078125, "loss_xval": 0.455078125, "num_input_tokens_seen": 109421796, "step": 1746 }, { "epoch": 5.813643926788686, "grad_norm": 13.455323219299316, "learning_rate": 5e-06, "loss": 0.8716, "num_input_tokens_seen": 109485112, "step": 1747 }, { "epoch": 5.813643926788686, "loss": 0.9304600954055786, "loss_ce": 0.0002843089459929615, "loss_iou": 0.337890625, "loss_num": 0.05078125, "loss_xval": 0.9296875, "num_input_tokens_seen": 109485112, "step": 1747 }, { "epoch": 5.816971713810316, "grad_norm": 17.337217330932617, "learning_rate": 5e-06, "loss": 0.6384, "num_input_tokens_seen": 109547828, "step": 1748 }, { "epoch": 5.816971713810316, "loss": 0.8259046673774719, "loss_ce": 0.00046522487537004054, "loss_iou": 0.298828125, "loss_num": 0.045654296875, "loss_xval": 0.82421875, "num_input_tokens_seen": 109547828, "step": 1748 }, { "epoch": 5.820299500831947, "grad_norm": 8.980031967163086, "learning_rate": 5e-06, "loss": 0.5732, "num_input_tokens_seen": 109610380, "step": 1749 }, { "epoch": 5.820299500831947, "loss": 0.4640018939971924, "loss_ce": 1.261913166672457e-05, "loss_iou": 0.15625, "loss_num": 0.0302734375, "loss_xval": 0.46484375, "num_input_tokens_seen": 109610380, "step": 1749 }, { "epoch": 5.8236272878535775, "grad_norm": 13.945286750793457, "learning_rate": 5e-06, "loss": 0.5297, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_seeclick_CIoU": 0.10236945003271103, "eval_seeclick_GIoU": 0.11712978780269623, "eval_seeclick_IoU": 0.2036900669336319, "eval_seeclick_MAE_all": 0.16942255944013596, "eval_seeclick_MAE_h": 0.049304405227303505, "eval_seeclick_MAE_w": 0.12896203249692917, "eval_seeclick_MAE_x_boxes": 0.23738989233970642, "eval_seeclick_MAE_y_boxes": 0.1408754587173462, "eval_seeclick_NUM_probability": 0.9998235106468201, "eval_seeclick_inside_bbox": 0.20937500149011612, "eval_seeclick_loss": 2.7325868606567383, "eval_seeclick_loss_ce": 0.11027439311146736, "eval_seeclick_loss_iou": 0.889892578125, "eval_seeclick_loss_num": 0.16834259033203125, "eval_seeclick_loss_xval": 2.6220703125, "eval_seeclick_runtime": 67.9027, "eval_seeclick_samples_per_second": 0.692, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_icons_CIoU": 0.04104041517712176, "eval_icons_GIoU": 0.1604958102107048, "eval_icons_IoU": 0.19673043489456177, "eval_icons_MAE_all": 0.14254845678806305, "eval_icons_MAE_h": 0.05420855712145567, "eval_icons_MAE_w": 0.15784180164337158, "eval_icons_MAE_x_boxes": 0.15393128246068954, "eval_icons_MAE_y_boxes": 0.047592333517968655, "eval_icons_NUM_probability": 0.9999956786632538, "eval_icons_inside_bbox": 0.3420138955116272, "eval_icons_loss": 2.3909385204315186, "eval_icons_loss_ce": 9.011300790007226e-07, "eval_icons_loss_iou": 0.824462890625, "eval_icons_loss_num": 0.14644622802734375, "eval_icons_loss_xval": 2.3818359375, "eval_icons_runtime": 68.4022, "eval_icons_samples_per_second": 0.731, "eval_icons_steps_per_second": 0.029, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_screenspot_CIoU": 0.06936451761672895, "eval_screenspot_GIoU": 0.13237376511096954, "eval_screenspot_IoU": 0.21043485403060913, "eval_screenspot_MAE_all": 0.18726058304309845, "eval_screenspot_MAE_h": 0.07297260562578838, "eval_screenspot_MAE_w": 0.14615009228388467, "eval_screenspot_MAE_x_boxes": 0.25001437962055206, "eval_screenspot_MAE_y_boxes": 0.13508301973342896, "eval_screenspot_NUM_probability": 0.9999534090360006, "eval_screenspot_inside_bbox": 0.3625000019868215, "eval_screenspot_loss": 2.6945841312408447, "eval_screenspot_loss_ce": 5.838420596167756e-05, "eval_screenspot_loss_iou": 0.88134765625, "eval_screenspot_loss_num": 0.194793701171875, "eval_screenspot_loss_xval": 2.7360026041666665, "eval_screenspot_runtime": 119.8843, "eval_screenspot_samples_per_second": 0.742, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_compot_CIoU": 0.003425128757953644, "eval_compot_GIoU": 0.11101753637194633, "eval_compot_IoU": 0.17780261486768723, "eval_compot_MAE_all": 0.2019421085715294, "eval_compot_MAE_h": 0.07107486762106419, "eval_compot_MAE_w": 0.2705545723438263, "eval_compot_MAE_x_boxes": 0.20419960469007492, "eval_compot_MAE_y_boxes": 0.08390221372246742, "eval_compot_NUM_probability": 0.9999839067459106, "eval_compot_inside_bbox": 0.3107638955116272, "eval_compot_loss": 2.85368275642395, "eval_compot_loss_ce": 0.004224188975058496, "eval_compot_loss_iou": 0.915283203125, "eval_compot_loss_num": 0.2141571044921875, "eval_compot_loss_xval": 2.9033203125, "eval_compot_runtime": 72.3216, "eval_compot_samples_per_second": 0.691, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_custom_ui_MAE_all": 0.08030644804239273, "eval_custom_ui_MAE_x": 0.07421864569187164, "eval_custom_ui_MAE_y": 0.08639425411820412, "eval_custom_ui_NUM_probability": 0.999993622303009, "eval_custom_ui_loss": 0.3886740803718567, "eval_custom_ui_loss_ce": 2.7836218805532553e-06, "eval_custom_ui_loss_num": 0.074920654296875, "eval_custom_ui_loss_xval": 0.3746337890625, "eval_custom_ui_runtime": 53.634, "eval_custom_ui_samples_per_second": 0.932, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.8236272878535775, "loss": 0.3675568401813507, "loss_ce": 3.1467793633055408e-06, "loss_iou": 0.0, "loss_num": 0.0732421875, "loss_xval": 0.3671875, "num_input_tokens_seen": 109672416, "step": 1750 }, { "epoch": 5.826955074875208, "grad_norm": 9.24660873413086, "learning_rate": 5e-06, "loss": 0.8293, "num_input_tokens_seen": 109735632, "step": 1751 }, { "epoch": 5.826955074875208, "loss": 0.6466149091720581, "loss_ce": 0.0006188148399814963, "loss_iou": 0.2197265625, "loss_num": 0.04150390625, "loss_xval": 0.64453125, "num_input_tokens_seen": 109735632, "step": 1751 }, { "epoch": 5.830282861896839, "grad_norm": 8.810318946838379, "learning_rate": 5e-06, "loss": 0.7299, "num_input_tokens_seen": 109799360, "step": 1752 }, { "epoch": 5.830282861896839, "loss": 0.7848711013793945, "loss_ce": 0.0011797224869951606, "loss_iou": 0.3125, "loss_num": 0.031494140625, "loss_xval": 0.78515625, "num_input_tokens_seen": 109799360, "step": 1752 }, { "epoch": 5.833610648918469, "grad_norm": 10.353952407836914, "learning_rate": 5e-06, "loss": 0.6097, "num_input_tokens_seen": 109860312, "step": 1753 }, { "epoch": 5.833610648918469, "loss": 0.6687078475952148, "loss_ce": 6.68507709633559e-06, "loss_iou": 0.193359375, "loss_num": 0.056396484375, "loss_xval": 0.66796875, "num_input_tokens_seen": 109860312, "step": 1753 }, { "epoch": 5.8369384359401, "grad_norm": 37.4395866394043, "learning_rate": 5e-06, "loss": 0.6346, "num_input_tokens_seen": 109921664, "step": 1754 }, { "epoch": 5.8369384359401, "loss": 0.6058779358863831, "loss_ce": 0.0012636694591492414, "loss_iou": 0.2265625, "loss_num": 0.0302734375, "loss_xval": 0.60546875, "num_input_tokens_seen": 109921664, "step": 1754 }, { "epoch": 5.840266222961731, "grad_norm": 18.188020706176758, "learning_rate": 5e-06, "loss": 0.5968, "num_input_tokens_seen": 109983676, "step": 1755 }, { "epoch": 5.840266222961731, "loss": 0.5723915100097656, "loss_ce": 0.003986367955803871, "loss_iou": 0.1904296875, "loss_num": 0.037841796875, "loss_xval": 0.5703125, "num_input_tokens_seen": 109983676, "step": 1755 }, { "epoch": 5.843594009983361, "grad_norm": 17.082475662231445, "learning_rate": 5e-06, "loss": 0.6797, "num_input_tokens_seen": 110046380, "step": 1756 }, { "epoch": 5.843594009983361, "loss": 0.4576748311519623, "loss_ce": 0.0001553079637233168, "loss_iou": 0.158203125, "loss_num": 0.0281982421875, "loss_xval": 0.45703125, "num_input_tokens_seen": 110046380, "step": 1756 }, { "epoch": 5.846921797004992, "grad_norm": 9.511809349060059, "learning_rate": 5e-06, "loss": 0.525, "num_input_tokens_seen": 110109532, "step": 1757 }, { "epoch": 5.846921797004992, "loss": 0.4188133776187897, "loss_ce": 0.00011222133616684005, "loss_iou": 0.10888671875, "loss_num": 0.040283203125, "loss_xval": 0.41796875, "num_input_tokens_seen": 110109532, "step": 1757 }, { "epoch": 5.850249584026622, "grad_norm": 11.285202026367188, "learning_rate": 5e-06, "loss": 0.5777, "num_input_tokens_seen": 110172300, "step": 1758 }, { "epoch": 5.850249584026622, "loss": 0.657119631767273, "loss_ce": 1.5172716302913614e-05, "loss_iou": 0.21875, "loss_num": 0.0439453125, "loss_xval": 0.65625, "num_input_tokens_seen": 110172300, "step": 1758 }, { "epoch": 5.853577371048253, "grad_norm": 13.522523880004883, "learning_rate": 5e-06, "loss": 0.4297, "num_input_tokens_seen": 110235616, "step": 1759 }, { "epoch": 5.853577371048253, "loss": 0.5291931629180908, "loss_ce": 1.8332864783587866e-05, "loss_iou": 0.1806640625, "loss_num": 0.03369140625, "loss_xval": 0.52734375, "num_input_tokens_seen": 110235616, "step": 1759 }, { "epoch": 5.856905158069884, "grad_norm": 10.221686363220215, "learning_rate": 5e-06, "loss": 0.3553, "num_input_tokens_seen": 110296840, "step": 1760 }, { "epoch": 5.856905158069884, "loss": 0.4893026351928711, "loss_ce": 0.00016688673349563032, "loss_iou": 0.123046875, "loss_num": 0.048583984375, "loss_xval": 0.48828125, "num_input_tokens_seen": 110296840, "step": 1760 }, { "epoch": 5.860232945091514, "grad_norm": 21.090612411499023, "learning_rate": 5e-06, "loss": 0.6718, "num_input_tokens_seen": 110359480, "step": 1761 }, { "epoch": 5.860232945091514, "loss": 0.7827401161193848, "loss_ce": 2.530876372475177e-05, "loss_iou": 0.306640625, "loss_num": 0.033935546875, "loss_xval": 0.78125, "num_input_tokens_seen": 110359480, "step": 1761 }, { "epoch": 5.863560732113145, "grad_norm": 25.800317764282227, "learning_rate": 5e-06, "loss": 0.764, "num_input_tokens_seen": 110421012, "step": 1762 }, { "epoch": 5.863560732113145, "loss": 0.8109021186828613, "loss_ce": 0.0003552451671566814, "loss_iou": 0.27734375, "loss_num": 0.05126953125, "loss_xval": 0.8125, "num_input_tokens_seen": 110421012, "step": 1762 }, { "epoch": 5.8668885191347755, "grad_norm": 13.869083404541016, "learning_rate": 5e-06, "loss": 0.6566, "num_input_tokens_seen": 110484348, "step": 1763 }, { "epoch": 5.8668885191347755, "loss": 0.7028859257698059, "loss_ce": 5.098950623505516e-06, "loss_iou": 0.2177734375, "loss_num": 0.053466796875, "loss_xval": 0.703125, "num_input_tokens_seen": 110484348, "step": 1763 }, { "epoch": 5.870216306156406, "grad_norm": 11.48287296295166, "learning_rate": 5e-06, "loss": 0.7074, "num_input_tokens_seen": 110547872, "step": 1764 }, { "epoch": 5.870216306156406, "loss": 0.8577039241790771, "loss_ce": 0.0011365680256858468, "loss_iou": 0.32421875, "loss_num": 0.042236328125, "loss_xval": 0.85546875, "num_input_tokens_seen": 110547872, "step": 1764 }, { "epoch": 5.873544093178037, "grad_norm": 14.099336624145508, "learning_rate": 5e-06, "loss": 0.6525, "num_input_tokens_seen": 110611396, "step": 1765 }, { "epoch": 5.873544093178037, "loss": 0.7531905174255371, "loss_ce": 0.0022139903157949448, "loss_iou": 0.236328125, "loss_num": 0.0556640625, "loss_xval": 0.75, "num_input_tokens_seen": 110611396, "step": 1765 }, { "epoch": 5.876871880199667, "grad_norm": 17.61534309387207, "learning_rate": 5e-06, "loss": 0.8037, "num_input_tokens_seen": 110673936, "step": 1766 }, { "epoch": 5.876871880199667, "loss": 0.6117116808891296, "loss_ce": 1.73293192347046e-05, "loss_iou": 0.21875, "loss_num": 0.034912109375, "loss_xval": 0.61328125, "num_input_tokens_seen": 110673936, "step": 1766 }, { "epoch": 5.880199667221298, "grad_norm": 31.511371612548828, "learning_rate": 5e-06, "loss": 0.7188, "num_input_tokens_seen": 110737000, "step": 1767 }, { "epoch": 5.880199667221298, "loss": 1.0573480129241943, "loss_ce": 0.0015619065379723907, "loss_iou": 0.32421875, "loss_num": 0.08154296875, "loss_xval": 1.0546875, "num_input_tokens_seen": 110737000, "step": 1767 }, { "epoch": 5.883527454242929, "grad_norm": 35.806556701660156, "learning_rate": 5e-06, "loss": 0.505, "num_input_tokens_seen": 110800016, "step": 1768 }, { "epoch": 5.883527454242929, "loss": 0.48276591300964355, "loss_ce": 0.0013205973664298654, "loss_iou": 0.15625, "loss_num": 0.033935546875, "loss_xval": 0.48046875, "num_input_tokens_seen": 110800016, "step": 1768 }, { "epoch": 5.886855241264559, "grad_norm": 25.79120635986328, "learning_rate": 5e-06, "loss": 0.5601, "num_input_tokens_seen": 110864032, "step": 1769 }, { "epoch": 5.886855241264559, "loss": 0.5003718137741089, "loss_ce": 0.0004939304199069738, "loss_iou": 0.1640625, "loss_num": 0.034423828125, "loss_xval": 0.5, "num_input_tokens_seen": 110864032, "step": 1769 }, { "epoch": 5.89018302828619, "grad_norm": 9.691274642944336, "learning_rate": 5e-06, "loss": 0.5045, "num_input_tokens_seen": 110925816, "step": 1770 }, { "epoch": 5.89018302828619, "loss": 0.4838283658027649, "loss_ce": 3.321943586342968e-05, "loss_iou": 0.1826171875, "loss_num": 0.0235595703125, "loss_xval": 0.484375, "num_input_tokens_seen": 110925816, "step": 1770 }, { "epoch": 5.89351081530782, "grad_norm": 12.821023941040039, "learning_rate": 5e-06, "loss": 0.6885, "num_input_tokens_seen": 110989460, "step": 1771 }, { "epoch": 5.89351081530782, "loss": 0.5637482404708862, "loss_ce": 0.0002716918243095279, "loss_iou": 0.1845703125, "loss_num": 0.03857421875, "loss_xval": 0.5625, "num_input_tokens_seen": 110989460, "step": 1771 }, { "epoch": 5.896838602329451, "grad_norm": 9.636116981506348, "learning_rate": 5e-06, "loss": 0.444, "num_input_tokens_seen": 111052032, "step": 1772 }, { "epoch": 5.896838602329451, "loss": 0.4734896123409271, "loss_ce": 0.0005892139161005616, "loss_iou": 0.173828125, "loss_num": 0.025146484375, "loss_xval": 0.47265625, "num_input_tokens_seen": 111052032, "step": 1772 }, { "epoch": 5.900166389351082, "grad_norm": 18.86623764038086, "learning_rate": 5e-06, "loss": 0.7722, "num_input_tokens_seen": 111115768, "step": 1773 }, { "epoch": 5.900166389351082, "loss": 0.7136220932006836, "loss_ce": 0.0007315138936974108, "loss_iou": 0.263671875, "loss_num": 0.036865234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 111115768, "step": 1773 }, { "epoch": 5.903494176372712, "grad_norm": 13.020090103149414, "learning_rate": 5e-06, "loss": 0.6273, "num_input_tokens_seen": 111179252, "step": 1774 }, { "epoch": 5.903494176372712, "loss": 0.5287013053894043, "loss_ce": 1.4801251381868497e-05, "loss_iou": 0.166015625, "loss_num": 0.0390625, "loss_xval": 0.52734375, "num_input_tokens_seen": 111179252, "step": 1774 }, { "epoch": 5.906821963394343, "grad_norm": 23.916812896728516, "learning_rate": 5e-06, "loss": 0.5319, "num_input_tokens_seen": 111241816, "step": 1775 }, { "epoch": 5.906821963394343, "loss": 0.443467378616333, "loss_ce": 0.0006573410355485976, "loss_iou": 0.1455078125, "loss_num": 0.030517578125, "loss_xval": 0.443359375, "num_input_tokens_seen": 111241816, "step": 1775 }, { "epoch": 5.9101497504159735, "grad_norm": 19.583410263061523, "learning_rate": 5e-06, "loss": 0.5933, "num_input_tokens_seen": 111304188, "step": 1776 }, { "epoch": 5.9101497504159735, "loss": 0.5054777264595032, "loss_ce": 0.00010664336150512099, "loss_iou": 0.154296875, "loss_num": 0.0390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 111304188, "step": 1776 }, { "epoch": 5.913477537437604, "grad_norm": 13.995909690856934, "learning_rate": 5e-06, "loss": 0.7816, "num_input_tokens_seen": 111364944, "step": 1777 }, { "epoch": 5.913477537437604, "loss": 1.0646007061004639, "loss_ce": 0.0003917526628356427, "loss_iou": 0.361328125, "loss_num": 0.06884765625, "loss_xval": 1.0625, "num_input_tokens_seen": 111364944, "step": 1777 }, { "epoch": 5.916805324459235, "grad_norm": 9.358750343322754, "learning_rate": 5e-06, "loss": 0.5087, "num_input_tokens_seen": 111427100, "step": 1778 }, { "epoch": 5.916805324459235, "loss": 0.7140218615531921, "loss_ce": 0.0008870940655469894, "loss_iou": 0.2734375, "loss_num": 0.033447265625, "loss_xval": 0.71484375, "num_input_tokens_seen": 111427100, "step": 1778 }, { "epoch": 5.920133111480865, "grad_norm": 13.724045753479004, "learning_rate": 5e-06, "loss": 0.7266, "num_input_tokens_seen": 111490448, "step": 1779 }, { "epoch": 5.920133111480865, "loss": 0.6259258985519409, "loss_ce": 1.0390387615188956e-05, "loss_iou": 0.212890625, "loss_num": 0.040283203125, "loss_xval": 0.625, "num_input_tokens_seen": 111490448, "step": 1779 }, { "epoch": 5.923460898502496, "grad_norm": 10.216179847717285, "learning_rate": 5e-06, "loss": 0.6562, "num_input_tokens_seen": 111554604, "step": 1780 }, { "epoch": 5.923460898502496, "loss": 0.7411832809448242, "loss_ce": 0.00046060560271143913, "loss_iou": 0.30859375, "loss_num": 0.024658203125, "loss_xval": 0.7421875, "num_input_tokens_seen": 111554604, "step": 1780 }, { "epoch": 5.9267886855241265, "grad_norm": 17.054357528686523, "learning_rate": 5e-06, "loss": 0.7283, "num_input_tokens_seen": 111617796, "step": 1781 }, { "epoch": 5.9267886855241265, "loss": 0.9874345064163208, "loss_ce": 0.0007401675684377551, "loss_iou": 0.380859375, "loss_num": 0.044677734375, "loss_xval": 0.98828125, "num_input_tokens_seen": 111617796, "step": 1781 }, { "epoch": 5.930116472545757, "grad_norm": 14.132950782775879, "learning_rate": 5e-06, "loss": 0.8466, "num_input_tokens_seen": 111679904, "step": 1782 }, { "epoch": 5.930116472545757, "loss": 0.9425029754638672, "loss_ce": 0.0010967559646815062, "loss_iou": 0.35546875, "loss_num": 0.046630859375, "loss_xval": 0.94140625, "num_input_tokens_seen": 111679904, "step": 1782 }, { "epoch": 5.933444259567388, "grad_norm": 18.246789932250977, "learning_rate": 5e-06, "loss": 0.7549, "num_input_tokens_seen": 111743964, "step": 1783 }, { "epoch": 5.933444259567388, "loss": 0.9670125842094421, "loss_ce": 0.00021572083642240614, "loss_iou": 0.31640625, "loss_num": 0.06640625, "loss_xval": 0.96875, "num_input_tokens_seen": 111743964, "step": 1783 }, { "epoch": 5.936772046589018, "grad_norm": 18.004018783569336, "learning_rate": 5e-06, "loss": 0.7019, "num_input_tokens_seen": 111805552, "step": 1784 }, { "epoch": 5.936772046589018, "loss": 0.8806197643280029, "loss_ce": 4.619309947884176e-06, "loss_iou": 0.322265625, "loss_num": 0.047119140625, "loss_xval": 0.87890625, "num_input_tokens_seen": 111805552, "step": 1784 }, { "epoch": 5.940099833610649, "grad_norm": 10.934527397155762, "learning_rate": 5e-06, "loss": 0.5895, "num_input_tokens_seen": 111869060, "step": 1785 }, { "epoch": 5.940099833610649, "loss": 0.5993823409080505, "loss_ce": 1.712212906568311e-05, "loss_iou": 0.2197265625, "loss_num": 0.031982421875, "loss_xval": 0.59765625, "num_input_tokens_seen": 111869060, "step": 1785 }, { "epoch": 5.94342762063228, "grad_norm": 11.646177291870117, "learning_rate": 5e-06, "loss": 0.7022, "num_input_tokens_seen": 111932476, "step": 1786 }, { "epoch": 5.94342762063228, "loss": 0.7437913417816162, "loss_ce": 1.6888494428712875e-05, "loss_iou": 0.298828125, "loss_num": 0.0294189453125, "loss_xval": 0.7421875, "num_input_tokens_seen": 111932476, "step": 1786 }, { "epoch": 5.94675540765391, "grad_norm": 9.77434253692627, "learning_rate": 5e-06, "loss": 0.3784, "num_input_tokens_seen": 111995188, "step": 1787 }, { "epoch": 5.94675540765391, "loss": 0.3339497745037079, "loss_ce": 0.00014850017032586038, "loss_iou": 0.1240234375, "loss_num": 0.0172119140625, "loss_xval": 0.333984375, "num_input_tokens_seen": 111995188, "step": 1787 }, { "epoch": 5.950083194675541, "grad_norm": 9.4547700881958, "learning_rate": 5e-06, "loss": 0.6886, "num_input_tokens_seen": 112059688, "step": 1788 }, { "epoch": 5.950083194675541, "loss": 0.7533557415008545, "loss_ce": 0.0010364109184592962, "loss_iou": 0.251953125, "loss_num": 0.0498046875, "loss_xval": 0.75390625, "num_input_tokens_seen": 112059688, "step": 1788 }, { "epoch": 5.953410981697171, "grad_norm": 10.41918659210205, "learning_rate": 5e-06, "loss": 0.4527, "num_input_tokens_seen": 112122556, "step": 1789 }, { "epoch": 5.953410981697171, "loss": 0.4796237349510193, "loss_ce": 9.511190000921488e-06, "loss_iou": 0.1328125, "loss_num": 0.042724609375, "loss_xval": 0.48046875, "num_input_tokens_seen": 112122556, "step": 1789 }, { "epoch": 5.956738768718802, "grad_norm": 7.984902381896973, "learning_rate": 5e-06, "loss": 0.6002, "num_input_tokens_seen": 112184820, "step": 1790 }, { "epoch": 5.956738768718802, "loss": 0.5412213206291199, "loss_ce": 0.0007550385198555887, "loss_iou": 0.1865234375, "loss_num": 0.033447265625, "loss_xval": 0.5390625, "num_input_tokens_seen": 112184820, "step": 1790 }, { "epoch": 5.960066555740433, "grad_norm": 10.358345031738281, "learning_rate": 5e-06, "loss": 0.6859, "num_input_tokens_seen": 112246916, "step": 1791 }, { "epoch": 5.960066555740433, "loss": 0.5734908580780029, "loss_ce": 4.511678980634315e-06, "loss_iou": 0.1396484375, "loss_num": 0.058837890625, "loss_xval": 0.57421875, "num_input_tokens_seen": 112246916, "step": 1791 }, { "epoch": 5.963394342762063, "grad_norm": 9.835028648376465, "learning_rate": 5e-06, "loss": 0.7321, "num_input_tokens_seen": 112309796, "step": 1792 }, { "epoch": 5.963394342762063, "loss": 0.707068681716919, "loss_ce": 3.736479629878886e-05, "loss_iou": 0.2265625, "loss_num": 0.05078125, "loss_xval": 0.70703125, "num_input_tokens_seen": 112309796, "step": 1792 }, { "epoch": 5.966722129783694, "grad_norm": 8.542290687561035, "learning_rate": 5e-06, "loss": 0.771, "num_input_tokens_seen": 112372752, "step": 1793 }, { "epoch": 5.966722129783694, "loss": 0.784670352935791, "loss_ce": 0.00036859206738881767, "loss_iou": 0.28125, "loss_num": 0.044189453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 112372752, "step": 1793 }, { "epoch": 5.9700499168053245, "grad_norm": 10.128270149230957, "learning_rate": 5e-06, "loss": 0.6849, "num_input_tokens_seen": 112435784, "step": 1794 }, { "epoch": 5.9700499168053245, "loss": 0.7285070419311523, "loss_ce": 5.2509160013869405e-05, "loss_iou": 0.2353515625, "loss_num": 0.0517578125, "loss_xval": 0.7265625, "num_input_tokens_seen": 112435784, "step": 1794 }, { "epoch": 5.973377703826955, "grad_norm": 21.481040954589844, "learning_rate": 5e-06, "loss": 0.7537, "num_input_tokens_seen": 112499088, "step": 1795 }, { "epoch": 5.973377703826955, "loss": 0.4841929078102112, "loss_ce": 0.000794428342487663, "loss_iou": 0.158203125, "loss_num": 0.033447265625, "loss_xval": 0.484375, "num_input_tokens_seen": 112499088, "step": 1795 }, { "epoch": 5.976705490848586, "grad_norm": 17.77248764038086, "learning_rate": 5e-06, "loss": 0.5665, "num_input_tokens_seen": 112562248, "step": 1796 }, { "epoch": 5.976705490848586, "loss": 0.34294500946998596, "loss_ce": 0.0002020865649683401, "loss_iou": 0.1142578125, "loss_num": 0.0228271484375, "loss_xval": 0.341796875, "num_input_tokens_seen": 112562248, "step": 1796 }, { "epoch": 5.980033277870216, "grad_norm": 9.828653335571289, "learning_rate": 5e-06, "loss": 0.615, "num_input_tokens_seen": 112624292, "step": 1797 }, { "epoch": 5.980033277870216, "loss": 0.8139055371284485, "loss_ce": 0.0006731028552167118, "loss_iou": 0.27734375, "loss_num": 0.052001953125, "loss_xval": 0.8125, "num_input_tokens_seen": 112624292, "step": 1797 }, { "epoch": 5.983361064891847, "grad_norm": 10.219780921936035, "learning_rate": 5e-06, "loss": 0.5873, "num_input_tokens_seen": 112687100, "step": 1798 }, { "epoch": 5.983361064891847, "loss": 0.6688265204429626, "loss_ce": 3.2744080726843094e-06, "loss_iou": 0.193359375, "loss_num": 0.056640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 112687100, "step": 1798 }, { "epoch": 5.9866888519134775, "grad_norm": 17.44321632385254, "learning_rate": 5e-06, "loss": 0.6239, "num_input_tokens_seen": 112748904, "step": 1799 }, { "epoch": 5.9866888519134775, "loss": 0.7041700482368469, "loss_ce": 0.0011061305413022637, "loss_iou": 0.29296875, "loss_num": 0.02294921875, "loss_xval": 0.703125, "num_input_tokens_seen": 112748904, "step": 1799 }, { "epoch": 5.990016638935108, "grad_norm": 20.630294799804688, "learning_rate": 5e-06, "loss": 0.7101, "num_input_tokens_seen": 112810924, "step": 1800 }, { "epoch": 5.990016638935108, "loss": 0.6245334148406982, "loss_ce": 0.00014378642663359642, "loss_iou": 0.22265625, "loss_num": 0.035888671875, "loss_xval": 0.625, "num_input_tokens_seen": 112810924, "step": 1800 }, { "epoch": 5.993344425956739, "grad_norm": 17.083229064941406, "learning_rate": 5e-06, "loss": 0.686, "num_input_tokens_seen": 112875104, "step": 1801 }, { "epoch": 5.993344425956739, "loss": 0.7393364310264587, "loss_ce": 0.0003227871493436396, "loss_iou": 0.275390625, "loss_num": 0.03759765625, "loss_xval": 0.73828125, "num_input_tokens_seen": 112875104, "step": 1801 }, { "epoch": 5.996672212978369, "grad_norm": 5.339547157287598, "learning_rate": 5e-06, "loss": 0.4334, "num_input_tokens_seen": 112937884, "step": 1802 }, { "epoch": 5.996672212978369, "loss": 0.4524919390678406, "loss_ce": 0.00022140935470815748, "loss_iou": 0.15234375, "loss_num": 0.0296630859375, "loss_xval": 0.453125, "num_input_tokens_seen": 112937884, "step": 1802 }, { "epoch": 6.0, "grad_norm": 12.052851676940918, "learning_rate": 5e-06, "loss": 0.5541, "num_input_tokens_seen": 113000836, "step": 1803 }, { "epoch": 6.0, "loss": 0.6314157247543335, "loss_ce": 0.00012909869838040322, "loss_iou": 0.212890625, "loss_num": 0.041015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 113000836, "step": 1803 }, { "epoch": 6.003327787021631, "grad_norm": 17.944114685058594, "learning_rate": 5e-06, "loss": 0.6348, "num_input_tokens_seen": 113064196, "step": 1804 }, { "epoch": 6.003327787021631, "loss": 0.6592069864273071, "loss_ce": 2.7252650397713296e-05, "loss_iou": 0.24609375, "loss_num": 0.033203125, "loss_xval": 0.66015625, "num_input_tokens_seen": 113064196, "step": 1804 }, { "epoch": 6.006655574043261, "grad_norm": 8.979909896850586, "learning_rate": 5e-06, "loss": 0.4955, "num_input_tokens_seen": 113125704, "step": 1805 }, { "epoch": 6.006655574043261, "loss": 0.5311020612716675, "loss_ce": 0.0003403459268156439, "loss_iou": 0.1611328125, "loss_num": 0.04150390625, "loss_xval": 0.53125, "num_input_tokens_seen": 113125704, "step": 1805 }, { "epoch": 6.009983361064892, "grad_norm": 23.03765869140625, "learning_rate": 5e-06, "loss": 0.5458, "num_input_tokens_seen": 113189092, "step": 1806 }, { "epoch": 6.009983361064892, "loss": 0.6970461010932922, "loss_ce": 0.00026878988137468696, "loss_iou": 0.2265625, "loss_num": 0.048828125, "loss_xval": 0.6953125, "num_input_tokens_seen": 113189092, "step": 1806 }, { "epoch": 6.0133111480865225, "grad_norm": 40.743404388427734, "learning_rate": 5e-06, "loss": 0.7297, "num_input_tokens_seen": 113252312, "step": 1807 }, { "epoch": 6.0133111480865225, "loss": 0.9480460286140442, "loss_ce": 0.0007804275956004858, "loss_iou": 0.345703125, "loss_num": 0.05126953125, "loss_xval": 0.9453125, "num_input_tokens_seen": 113252312, "step": 1807 }, { "epoch": 6.016638935108153, "grad_norm": 24.678939819335938, "learning_rate": 5e-06, "loss": 0.4995, "num_input_tokens_seen": 113315144, "step": 1808 }, { "epoch": 6.016638935108153, "loss": 0.6151464581489563, "loss_ce": 0.00015620810154359788, "loss_iou": 0.255859375, "loss_num": 0.020751953125, "loss_xval": 0.61328125, "num_input_tokens_seen": 113315144, "step": 1808 }, { "epoch": 6.019966722129784, "grad_norm": 8.93970775604248, "learning_rate": 5e-06, "loss": 0.6358, "num_input_tokens_seen": 113378196, "step": 1809 }, { "epoch": 6.019966722129784, "loss": 0.4480162262916565, "loss_ce": 0.0001402330381097272, "loss_iou": 0.169921875, "loss_num": 0.0216064453125, "loss_xval": 0.447265625, "num_input_tokens_seen": 113378196, "step": 1809 }, { "epoch": 6.023294509151414, "grad_norm": 6.139615058898926, "learning_rate": 5e-06, "loss": 0.3159, "num_input_tokens_seen": 113438784, "step": 1810 }, { "epoch": 6.023294509151414, "loss": 0.3206220269203186, "loss_ce": 4.330090632720385e-06, "loss_iou": 0.07421875, "loss_num": 0.034423828125, "loss_xval": 0.3203125, "num_input_tokens_seen": 113438784, "step": 1810 }, { "epoch": 6.026622296173045, "grad_norm": 25.28673553466797, "learning_rate": 5e-06, "loss": 0.5646, "num_input_tokens_seen": 113501584, "step": 1811 }, { "epoch": 6.026622296173045, "loss": 0.47726649045944214, "loss_ce": 0.0009481181623414159, "loss_iou": 0.1396484375, "loss_num": 0.03955078125, "loss_xval": 0.4765625, "num_input_tokens_seen": 113501584, "step": 1811 }, { "epoch": 6.0299500831946755, "grad_norm": 23.174034118652344, "learning_rate": 5e-06, "loss": 0.6892, "num_input_tokens_seen": 113564772, "step": 1812 }, { "epoch": 6.0299500831946755, "loss": 0.638823390007019, "loss_ce": 0.0003956563596148044, "loss_iou": 0.2197265625, "loss_num": 0.039794921875, "loss_xval": 0.63671875, "num_input_tokens_seen": 113564772, "step": 1812 }, { "epoch": 6.033277870216306, "grad_norm": 9.570660591125488, "learning_rate": 5e-06, "loss": 0.482, "num_input_tokens_seen": 113627164, "step": 1813 }, { "epoch": 6.033277870216306, "loss": 0.5443622469902039, "loss_ce": 0.00017276505241170526, "loss_iou": 0.2158203125, "loss_num": 0.0224609375, "loss_xval": 0.54296875, "num_input_tokens_seen": 113627164, "step": 1813 }, { "epoch": 6.036605657237937, "grad_norm": 15.14444351196289, "learning_rate": 5e-06, "loss": 0.567, "num_input_tokens_seen": 113690112, "step": 1814 }, { "epoch": 6.036605657237937, "loss": 0.4782255291938782, "loss_ce": 1.5094023183337413e-05, "loss_iou": 0.1875, "loss_num": 0.0206298828125, "loss_xval": 0.478515625, "num_input_tokens_seen": 113690112, "step": 1814 }, { "epoch": 6.039933444259567, "grad_norm": 13.660589218139648, "learning_rate": 5e-06, "loss": 0.617, "num_input_tokens_seen": 113753248, "step": 1815 }, { "epoch": 6.039933444259567, "loss": 0.7184145450592041, "loss_ce": 3.077809378737584e-05, "loss_iou": 0.265625, "loss_num": 0.037353515625, "loss_xval": 0.71875, "num_input_tokens_seen": 113753248, "step": 1815 }, { "epoch": 6.043261231281198, "grad_norm": 9.653757095336914, "learning_rate": 5e-06, "loss": 0.9097, "num_input_tokens_seen": 113817632, "step": 1816 }, { "epoch": 6.043261231281198, "loss": 1.0042712688446045, "loss_ce": 0.0004870828415732831, "loss_iou": 0.33984375, "loss_num": 0.064453125, "loss_xval": 1.0, "num_input_tokens_seen": 113817632, "step": 1816 }, { "epoch": 6.046589018302829, "grad_norm": 10.754157066345215, "learning_rate": 5e-06, "loss": 0.791, "num_input_tokens_seen": 113880676, "step": 1817 }, { "epoch": 6.046589018302829, "loss": 0.6697897911071777, "loss_ce": 0.00011203553731320426, "loss_iou": 0.25390625, "loss_num": 0.03271484375, "loss_xval": 0.66796875, "num_input_tokens_seen": 113880676, "step": 1817 }, { "epoch": 6.049916805324459, "grad_norm": 12.63485336303711, "learning_rate": 5e-06, "loss": 0.5059, "num_input_tokens_seen": 113941992, "step": 1818 }, { "epoch": 6.049916805324459, "loss": 0.4937788248062134, "loss_ce": 4.377495315566193e-06, "loss_iou": 0.13671875, "loss_num": 0.044189453125, "loss_xval": 0.494140625, "num_input_tokens_seen": 113941992, "step": 1818 }, { "epoch": 6.05324459234609, "grad_norm": 6.852193832397461, "learning_rate": 5e-06, "loss": 0.5946, "num_input_tokens_seen": 114003684, "step": 1819 }, { "epoch": 6.05324459234609, "loss": 0.6906334161758423, "loss_ce": 0.0008140857680700719, "loss_iou": 0.259765625, "loss_num": 0.033935546875, "loss_xval": 0.69140625, "num_input_tokens_seen": 114003684, "step": 1819 }, { "epoch": 6.05657237936772, "grad_norm": 14.351150512695312, "learning_rate": 5e-06, "loss": 0.7158, "num_input_tokens_seen": 114065336, "step": 1820 }, { "epoch": 6.05657237936772, "loss": 0.7381648421287537, "loss_ce": 5.69224130231305e-06, "loss_iou": 0.2421875, "loss_num": 0.051025390625, "loss_xval": 0.73828125, "num_input_tokens_seen": 114065336, "step": 1820 }, { "epoch": 6.059900166389351, "grad_norm": 30.204614639282227, "learning_rate": 5e-06, "loss": 0.6653, "num_input_tokens_seen": 114127604, "step": 1821 }, { "epoch": 6.059900166389351, "loss": 0.6157270073890686, "loss_ce": 0.0010419311001896858, "loss_iou": 0.2265625, "loss_num": 0.0322265625, "loss_xval": 0.61328125, "num_input_tokens_seen": 114127604, "step": 1821 }, { "epoch": 6.063227953410982, "grad_norm": 9.658365249633789, "learning_rate": 5e-06, "loss": 0.5936, "num_input_tokens_seen": 114190672, "step": 1822 }, { "epoch": 6.063227953410982, "loss": 0.6472856402397156, "loss_ce": 0.0008012944017536938, "loss_iou": 0.23046875, "loss_num": 0.037109375, "loss_xval": 0.6484375, "num_input_tokens_seen": 114190672, "step": 1822 }, { "epoch": 6.066555740432612, "grad_norm": 18.5205135345459, "learning_rate": 5e-06, "loss": 0.6124, "num_input_tokens_seen": 114253368, "step": 1823 }, { "epoch": 6.066555740432612, "loss": 0.735488772392273, "loss_ce": 0.0006254613981582224, "loss_iou": 0.2314453125, "loss_num": 0.054443359375, "loss_xval": 0.734375, "num_input_tokens_seen": 114253368, "step": 1823 }, { "epoch": 6.069883527454243, "grad_norm": 9.273344993591309, "learning_rate": 5e-06, "loss": 0.8266, "num_input_tokens_seen": 114314064, "step": 1824 }, { "epoch": 6.069883527454243, "loss": 0.7096292972564697, "loss_ce": 0.0001566863793414086, "loss_iou": 0.263671875, "loss_num": 0.0361328125, "loss_xval": 0.7109375, "num_input_tokens_seen": 114314064, "step": 1824 }, { "epoch": 6.0732113144758735, "grad_norm": 10.49995231628418, "learning_rate": 5e-06, "loss": 0.4339, "num_input_tokens_seen": 114374024, "step": 1825 }, { "epoch": 6.0732113144758735, "loss": 0.4444683790206909, "loss_ce": 1.035763489198871e-05, "loss_iou": 0.10302734375, "loss_num": 0.047607421875, "loss_xval": 0.4453125, "num_input_tokens_seen": 114374024, "step": 1825 }, { "epoch": 6.076539101497504, "grad_norm": 30.338027954101562, "learning_rate": 5e-06, "loss": 0.6147, "num_input_tokens_seen": 114437368, "step": 1826 }, { "epoch": 6.076539101497504, "loss": 0.7704422473907471, "loss_ce": 0.0011551063507795334, "loss_iou": 0.287109375, "loss_num": 0.038818359375, "loss_xval": 0.76953125, "num_input_tokens_seen": 114437368, "step": 1826 }, { "epoch": 6.079866888519135, "grad_norm": 21.672454833984375, "learning_rate": 5e-06, "loss": 0.69, "num_input_tokens_seen": 114500248, "step": 1827 }, { "epoch": 6.079866888519135, "loss": 0.42282021045684814, "loss_ce": 0.0004569635493680835, "loss_iou": 0.140625, "loss_num": 0.0283203125, "loss_xval": 0.421875, "num_input_tokens_seen": 114500248, "step": 1827 }, { "epoch": 6.083194675540765, "grad_norm": 12.467771530151367, "learning_rate": 5e-06, "loss": 0.5934, "num_input_tokens_seen": 114562540, "step": 1828 }, { "epoch": 6.083194675540765, "loss": 0.4341202676296234, "loss_ce": 0.0006791083724237978, "loss_iou": 0.134765625, "loss_num": 0.03271484375, "loss_xval": 0.43359375, "num_input_tokens_seen": 114562540, "step": 1828 }, { "epoch": 6.086522462562396, "grad_norm": 11.479426383972168, "learning_rate": 5e-06, "loss": 0.6028, "num_input_tokens_seen": 114624740, "step": 1829 }, { "epoch": 6.086522462562396, "loss": 0.5540913343429565, "loss_ce": 1.4216830095392652e-05, "loss_iou": 0.212890625, "loss_num": 0.025634765625, "loss_xval": 0.5546875, "num_input_tokens_seen": 114624740, "step": 1829 }, { "epoch": 6.0898502495840265, "grad_norm": 14.962776184082031, "learning_rate": 5e-06, "loss": 0.7424, "num_input_tokens_seen": 114689968, "step": 1830 }, { "epoch": 6.0898502495840265, "loss": 0.5499890446662903, "loss_ce": 0.000794704130385071, "loss_iou": 0.189453125, "loss_num": 0.033935546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 114689968, "step": 1830 }, { "epoch": 6.093178036605657, "grad_norm": 46.97150421142578, "learning_rate": 5e-06, "loss": 0.8063, "num_input_tokens_seen": 114755060, "step": 1831 }, { "epoch": 6.093178036605657, "loss": 0.8358315825462341, "loss_ce": 0.0001382491027470678, "loss_iou": 0.27734375, "loss_num": 0.056640625, "loss_xval": 0.8359375, "num_input_tokens_seen": 114755060, "step": 1831 }, { "epoch": 6.096505823627288, "grad_norm": 24.191722869873047, "learning_rate": 5e-06, "loss": 0.5722, "num_input_tokens_seen": 114817492, "step": 1832 }, { "epoch": 6.096505823627288, "loss": 0.5860408544540405, "loss_ce": 0.00022541148064192384, "loss_iou": 0.216796875, "loss_num": 0.03076171875, "loss_xval": 0.5859375, "num_input_tokens_seen": 114817492, "step": 1832 }, { "epoch": 6.099833610648918, "grad_norm": 12.793700218200684, "learning_rate": 5e-06, "loss": 0.7779, "num_input_tokens_seen": 114881384, "step": 1833 }, { "epoch": 6.099833610648918, "loss": 0.7877440452575684, "loss_ce": 0.0002685006766114384, "loss_iou": 0.279296875, "loss_num": 0.045654296875, "loss_xval": 0.7890625, "num_input_tokens_seen": 114881384, "step": 1833 }, { "epoch": 6.103161397670549, "grad_norm": 11.71887493133545, "learning_rate": 5e-06, "loss": 0.581, "num_input_tokens_seen": 114943416, "step": 1834 }, { "epoch": 6.103161397670549, "loss": 0.3188535273075104, "loss_ce": 5.8923419601342175e-06, "loss_iou": 0.0625, "loss_num": 0.038818359375, "loss_xval": 0.318359375, "num_input_tokens_seen": 114943416, "step": 1834 }, { "epoch": 6.10648918469218, "grad_norm": 8.89264965057373, "learning_rate": 5e-06, "loss": 0.5685, "num_input_tokens_seen": 115005544, "step": 1835 }, { "epoch": 6.10648918469218, "loss": 0.5760512351989746, "loss_ce": 1.4534259662468685e-06, "loss_iou": 0.1689453125, "loss_num": 0.047607421875, "loss_xval": 0.57421875, "num_input_tokens_seen": 115005544, "step": 1835 }, { "epoch": 6.10981697171381, "grad_norm": 23.207515716552734, "learning_rate": 5e-06, "loss": 0.6417, "num_input_tokens_seen": 115066228, "step": 1836 }, { "epoch": 6.10981697171381, "loss": 0.5505459308624268, "loss_ce": 0.001595688983798027, "loss_iou": 0.2216796875, "loss_num": 0.02099609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 115066228, "step": 1836 }, { "epoch": 6.113144758735441, "grad_norm": 44.30556106567383, "learning_rate": 5e-06, "loss": 0.8094, "num_input_tokens_seen": 115130620, "step": 1837 }, { "epoch": 6.113144758735441, "loss": 0.7348802089691162, "loss_ce": 0.0002610796072985977, "loss_iou": 0.271484375, "loss_num": 0.0380859375, "loss_xval": 0.734375, "num_input_tokens_seen": 115130620, "step": 1837 }, { "epoch": 6.116472545757071, "grad_norm": 24.386632919311523, "learning_rate": 5e-06, "loss": 0.6976, "num_input_tokens_seen": 115192732, "step": 1838 }, { "epoch": 6.116472545757071, "loss": 0.7894414663314819, "loss_ce": 0.0007451603887602687, "loss_iou": 0.283203125, "loss_num": 0.044677734375, "loss_xval": 0.7890625, "num_input_tokens_seen": 115192732, "step": 1838 }, { "epoch": 6.119800332778702, "grad_norm": 13.562687873840332, "learning_rate": 5e-06, "loss": 0.6654, "num_input_tokens_seen": 115257288, "step": 1839 }, { "epoch": 6.119800332778702, "loss": 0.803615152835846, "loss_ce": 0.0008807668928056955, "loss_iou": 0.33984375, "loss_num": 0.024169921875, "loss_xval": 0.8046875, "num_input_tokens_seen": 115257288, "step": 1839 }, { "epoch": 6.123128119800333, "grad_norm": 15.4951753616333, "learning_rate": 5e-06, "loss": 0.7436, "num_input_tokens_seen": 115321772, "step": 1840 }, { "epoch": 6.123128119800333, "loss": 0.7024877071380615, "loss_ce": 0.0003393086954019964, "loss_iou": 0.26953125, "loss_num": 0.032958984375, "loss_xval": 0.703125, "num_input_tokens_seen": 115321772, "step": 1840 }, { "epoch": 6.126455906821963, "grad_norm": 16.896167755126953, "learning_rate": 5e-06, "loss": 0.5496, "num_input_tokens_seen": 115383640, "step": 1841 }, { "epoch": 6.126455906821963, "loss": 0.6101754903793335, "loss_ce": 0.0001900848001241684, "loss_iou": 0.1611328125, "loss_num": 0.057373046875, "loss_xval": 0.609375, "num_input_tokens_seen": 115383640, "step": 1841 }, { "epoch": 6.129783693843594, "grad_norm": 25.171308517456055, "learning_rate": 5e-06, "loss": 0.5562, "num_input_tokens_seen": 115445032, "step": 1842 }, { "epoch": 6.129783693843594, "loss": 0.6438261270523071, "loss_ce": 0.0011869219597429037, "loss_iou": 0.185546875, "loss_num": 0.05419921875, "loss_xval": 0.64453125, "num_input_tokens_seen": 115445032, "step": 1842 }, { "epoch": 6.1331114808652245, "grad_norm": 17.913129806518555, "learning_rate": 5e-06, "loss": 0.3981, "num_input_tokens_seen": 115506208, "step": 1843 }, { "epoch": 6.1331114808652245, "loss": 0.3706634044647217, "loss_ce": 5.7946737797465175e-05, "loss_iou": 0.1416015625, "loss_num": 0.0174560546875, "loss_xval": 0.37109375, "num_input_tokens_seen": 115506208, "step": 1843 }, { "epoch": 6.136439267886855, "grad_norm": 11.869161605834961, "learning_rate": 5e-06, "loss": 0.5245, "num_input_tokens_seen": 115568952, "step": 1844 }, { "epoch": 6.136439267886855, "loss": 0.6334899663925171, "loss_ce": 0.0007995049236342311, "loss_iou": 0.21484375, "loss_num": 0.040283203125, "loss_xval": 0.6328125, "num_input_tokens_seen": 115568952, "step": 1844 }, { "epoch": 6.139767054908486, "grad_norm": 19.262910842895508, "learning_rate": 5e-06, "loss": 0.6218, "num_input_tokens_seen": 115631376, "step": 1845 }, { "epoch": 6.139767054908486, "loss": 0.6551563143730164, "loss_ce": 4.9998088798020035e-06, "loss_iou": 0.2314453125, "loss_num": 0.038818359375, "loss_xval": 0.65625, "num_input_tokens_seen": 115631376, "step": 1845 }, { "epoch": 6.143094841930116, "grad_norm": 16.876794815063477, "learning_rate": 5e-06, "loss": 0.5707, "num_input_tokens_seen": 115694676, "step": 1846 }, { "epoch": 6.143094841930116, "loss": 0.5590164661407471, "loss_ce": 0.002131724264472723, "loss_iou": 0.1826171875, "loss_num": 0.038330078125, "loss_xval": 0.55859375, "num_input_tokens_seen": 115694676, "step": 1846 }, { "epoch": 6.146422628951747, "grad_norm": 13.508707046508789, "learning_rate": 5e-06, "loss": 0.4522, "num_input_tokens_seen": 115757384, "step": 1847 }, { "epoch": 6.146422628951747, "loss": 0.39521026611328125, "loss_ce": 0.00019072243594564497, "loss_iou": 0.119140625, "loss_num": 0.031494140625, "loss_xval": 0.39453125, "num_input_tokens_seen": 115757384, "step": 1847 }, { "epoch": 6.149750415973378, "grad_norm": 24.865703582763672, "learning_rate": 5e-06, "loss": 0.6482, "num_input_tokens_seen": 115820368, "step": 1848 }, { "epoch": 6.149750415973378, "loss": 0.8225171566009521, "loss_ce": 7.408522833429743e-06, "loss_iou": 0.28515625, "loss_num": 0.05078125, "loss_xval": 0.82421875, "num_input_tokens_seen": 115820368, "step": 1848 }, { "epoch": 6.153078202995008, "grad_norm": 10.617776870727539, "learning_rate": 5e-06, "loss": 0.7869, "num_input_tokens_seen": 115883888, "step": 1849 }, { "epoch": 6.153078202995008, "loss": 0.7970017194747925, "loss_ce": 4.6364102672669105e-06, "loss_iou": 0.28125, "loss_num": 0.047119140625, "loss_xval": 0.796875, "num_input_tokens_seen": 115883888, "step": 1849 }, { "epoch": 6.156405990016639, "grad_norm": 13.21096134185791, "learning_rate": 5e-06, "loss": 0.6439, "num_input_tokens_seen": 115947024, "step": 1850 }, { "epoch": 6.156405990016639, "loss": 0.5439035892486572, "loss_ce": 0.0003855030226986855, "loss_iou": 0.1728515625, "loss_num": 0.039306640625, "loss_xval": 0.54296875, "num_input_tokens_seen": 115947024, "step": 1850 }, { "epoch": 6.159733777038269, "grad_norm": 12.673142433166504, "learning_rate": 5e-06, "loss": 0.591, "num_input_tokens_seen": 116008540, "step": 1851 }, { "epoch": 6.159733777038269, "loss": 0.5002042651176453, "loss_ce": 0.0006009893259033561, "loss_iou": 0.13671875, "loss_num": 0.045166015625, "loss_xval": 0.5, "num_input_tokens_seen": 116008540, "step": 1851 }, { "epoch": 6.1630615640599, "grad_norm": 12.410579681396484, "learning_rate": 5e-06, "loss": 0.5276, "num_input_tokens_seen": 116069856, "step": 1852 }, { "epoch": 6.1630615640599, "loss": 0.6053777933120728, "loss_ce": 0.00015313336916733533, "loss_iou": 0.203125, "loss_num": 0.03955078125, "loss_xval": 0.60546875, "num_input_tokens_seen": 116069856, "step": 1852 }, { "epoch": 6.166389351081531, "grad_norm": 10.24417781829834, "learning_rate": 5e-06, "loss": 0.6968, "num_input_tokens_seen": 116132820, "step": 1853 }, { "epoch": 6.166389351081531, "loss": 0.45372575521469116, "loss_ce": 0.0004481864161789417, "loss_iou": 0.1640625, "loss_num": 0.02490234375, "loss_xval": 0.453125, "num_input_tokens_seen": 116132820, "step": 1853 }, { "epoch": 6.169717138103161, "grad_norm": 19.29709243774414, "learning_rate": 5e-06, "loss": 0.8242, "num_input_tokens_seen": 116197008, "step": 1854 }, { "epoch": 6.169717138103161, "loss": 0.7781339287757874, "loss_ce": 0.0007902136421762407, "loss_iou": 0.302734375, "loss_num": 0.03466796875, "loss_xval": 0.77734375, "num_input_tokens_seen": 116197008, "step": 1854 }, { "epoch": 6.173044925124792, "grad_norm": 16.979761123657227, "learning_rate": 5e-06, "loss": 0.5969, "num_input_tokens_seen": 116260052, "step": 1855 }, { "epoch": 6.173044925124792, "loss": 0.5408188104629517, "loss_ce": 4.733385139843449e-05, "loss_iou": 0.1953125, "loss_num": 0.0301513671875, "loss_xval": 0.5390625, "num_input_tokens_seen": 116260052, "step": 1855 }, { "epoch": 6.1763727121464225, "grad_norm": 25.549842834472656, "learning_rate": 5e-06, "loss": 0.8045, "num_input_tokens_seen": 116322916, "step": 1856 }, { "epoch": 6.1763727121464225, "loss": 0.9726593494415283, "loss_ce": 0.0002472798223607242, "loss_iou": 0.3671875, "loss_num": 0.047607421875, "loss_xval": 0.97265625, "num_input_tokens_seen": 116322916, "step": 1856 }, { "epoch": 6.179700499168053, "grad_norm": 11.069473266601562, "learning_rate": 5e-06, "loss": 0.6182, "num_input_tokens_seen": 116386300, "step": 1857 }, { "epoch": 6.179700499168053, "loss": 0.8058730363845825, "loss_ce": 0.0003310354077257216, "loss_iou": 0.28515625, "loss_num": 0.046875, "loss_xval": 0.8046875, "num_input_tokens_seen": 116386300, "step": 1857 }, { "epoch": 6.183028286189684, "grad_norm": 11.940938949584961, "learning_rate": 5e-06, "loss": 0.6844, "num_input_tokens_seen": 116448856, "step": 1858 }, { "epoch": 6.183028286189684, "loss": 0.8730907440185547, "loss_ce": 0.0008984009618870914, "loss_iou": 0.322265625, "loss_num": 0.045654296875, "loss_xval": 0.87109375, "num_input_tokens_seen": 116448856, "step": 1858 }, { "epoch": 6.186356073211314, "grad_norm": 26.180950164794922, "learning_rate": 5e-06, "loss": 0.6785, "num_input_tokens_seen": 116513172, "step": 1859 }, { "epoch": 6.186356073211314, "loss": 0.6941823959350586, "loss_ce": 0.0003347777819726616, "loss_iou": 0.298828125, "loss_num": 0.01953125, "loss_xval": 0.6953125, "num_input_tokens_seen": 116513172, "step": 1859 }, { "epoch": 6.189683860232945, "grad_norm": 38.279239654541016, "learning_rate": 5e-06, "loss": 0.5853, "num_input_tokens_seen": 116575568, "step": 1860 }, { "epoch": 6.189683860232945, "loss": 0.6565850973129272, "loss_ce": 0.0003350722254253924, "loss_iou": 0.220703125, "loss_num": 0.043212890625, "loss_xval": 0.65625, "num_input_tokens_seen": 116575568, "step": 1860 }, { "epoch": 6.1930116472545755, "grad_norm": 23.869171142578125, "learning_rate": 5e-06, "loss": 0.5414, "num_input_tokens_seen": 116638880, "step": 1861 }, { "epoch": 6.1930116472545755, "loss": 0.48156753182411194, "loss_ce": 0.0002443046832922846, "loss_iou": 0.1630859375, "loss_num": 0.03076171875, "loss_xval": 0.48046875, "num_input_tokens_seen": 116638880, "step": 1861 }, { "epoch": 6.196339434276206, "grad_norm": 36.01377487182617, "learning_rate": 5e-06, "loss": 0.6001, "num_input_tokens_seen": 116702460, "step": 1862 }, { "epoch": 6.196339434276206, "loss": 0.5464353561401367, "loss_ce": 0.0007810404058545828, "loss_iou": 0.16796875, "loss_num": 0.0419921875, "loss_xval": 0.546875, "num_input_tokens_seen": 116702460, "step": 1862 }, { "epoch": 6.199667221297837, "grad_norm": 21.87510108947754, "learning_rate": 5e-06, "loss": 0.6532, "num_input_tokens_seen": 116765876, "step": 1863 }, { "epoch": 6.199667221297837, "loss": 0.814652144908905, "loss_ce": 0.00019903229258488864, "loss_iou": 0.28515625, "loss_num": 0.048583984375, "loss_xval": 0.8125, "num_input_tokens_seen": 116765876, "step": 1863 }, { "epoch": 6.202995008319467, "grad_norm": 7.908597946166992, "learning_rate": 5e-06, "loss": 0.5938, "num_input_tokens_seen": 116828836, "step": 1864 }, { "epoch": 6.202995008319467, "loss": 0.7178022265434265, "loss_ce": 0.000639147125184536, "loss_iou": 0.25390625, "loss_num": 0.042236328125, "loss_xval": 0.71875, "num_input_tokens_seen": 116828836, "step": 1864 }, { "epoch": 6.206322795341098, "grad_norm": 11.291393280029297, "learning_rate": 5e-06, "loss": 0.5858, "num_input_tokens_seen": 116890788, "step": 1865 }, { "epoch": 6.206322795341098, "loss": 0.7302819490432739, "loss_ce": 0.0004235214146319777, "loss_iou": 0.2578125, "loss_num": 0.04248046875, "loss_xval": 0.73046875, "num_input_tokens_seen": 116890788, "step": 1865 }, { "epoch": 6.209650582362729, "grad_norm": 27.98455238342285, "learning_rate": 5e-06, "loss": 0.6189, "num_input_tokens_seen": 116953012, "step": 1866 }, { "epoch": 6.209650582362729, "loss": 0.7605868577957153, "loss_ce": 8.884212729753926e-05, "loss_iou": 0.275390625, "loss_num": 0.041748046875, "loss_xval": 0.76171875, "num_input_tokens_seen": 116953012, "step": 1866 }, { "epoch": 6.212978369384359, "grad_norm": 37.32789611816406, "learning_rate": 5e-06, "loss": 0.6325, "num_input_tokens_seen": 117016728, "step": 1867 }, { "epoch": 6.212978369384359, "loss": 0.5575933456420898, "loss_ce": 0.0004644446889869869, "loss_iou": 0.1875, "loss_num": 0.036376953125, "loss_xval": 0.55859375, "num_input_tokens_seen": 117016728, "step": 1867 }, { "epoch": 6.21630615640599, "grad_norm": 8.77429485321045, "learning_rate": 5e-06, "loss": 0.7978, "num_input_tokens_seen": 117079776, "step": 1868 }, { "epoch": 6.21630615640599, "loss": 0.5884289741516113, "loss_ce": 0.0005383165553212166, "loss_iou": 0.2197265625, "loss_num": 0.029541015625, "loss_xval": 0.5859375, "num_input_tokens_seen": 117079776, "step": 1868 }, { "epoch": 6.21963394342762, "grad_norm": 13.149292945861816, "learning_rate": 5e-06, "loss": 0.4875, "num_input_tokens_seen": 117142040, "step": 1869 }, { "epoch": 6.21963394342762, "loss": 0.41048645973205566, "loss_ce": 2.5050567273865454e-05, "loss_iou": 0.140625, "loss_num": 0.0260009765625, "loss_xval": 0.41015625, "num_input_tokens_seen": 117142040, "step": 1869 }, { "epoch": 6.222961730449251, "grad_norm": 22.748687744140625, "learning_rate": 5e-06, "loss": 0.6843, "num_input_tokens_seen": 117204720, "step": 1870 }, { "epoch": 6.222961730449251, "loss": 0.6726725101470947, "loss_ce": 6.513913831440732e-05, "loss_iou": 0.228515625, "loss_num": 0.04296875, "loss_xval": 0.671875, "num_input_tokens_seen": 117204720, "step": 1870 }, { "epoch": 6.226289517470882, "grad_norm": 25.107208251953125, "learning_rate": 5e-06, "loss": 0.5835, "num_input_tokens_seen": 117268404, "step": 1871 }, { "epoch": 6.226289517470882, "loss": 0.5766459703445435, "loss_ce": 0.0013285500463098288, "loss_iou": 0.1845703125, "loss_num": 0.041259765625, "loss_xval": 0.57421875, "num_input_tokens_seen": 117268404, "step": 1871 }, { "epoch": 6.229617304492512, "grad_norm": 15.994500160217285, "learning_rate": 5e-06, "loss": 0.6522, "num_input_tokens_seen": 117331168, "step": 1872 }, { "epoch": 6.229617304492512, "loss": 0.7275434732437134, "loss_ce": 4.387545232020784e-06, "loss_iou": 0.2373046875, "loss_num": 0.05029296875, "loss_xval": 0.7265625, "num_input_tokens_seen": 117331168, "step": 1872 }, { "epoch": 6.232945091514143, "grad_norm": 14.865208625793457, "learning_rate": 5e-06, "loss": 0.4913, "num_input_tokens_seen": 117394412, "step": 1873 }, { "epoch": 6.232945091514143, "loss": 0.4301111400127411, "loss_ce": 5.7414017646806315e-05, "loss_iou": 0.11962890625, "loss_num": 0.0380859375, "loss_xval": 0.4296875, "num_input_tokens_seen": 117394412, "step": 1873 }, { "epoch": 6.2362728785357735, "grad_norm": 9.97636890411377, "learning_rate": 5e-06, "loss": 0.6502, "num_input_tokens_seen": 117457176, "step": 1874 }, { "epoch": 6.2362728785357735, "loss": 0.6446551084518433, "loss_ce": 0.0006731529720127583, "loss_iou": 0.212890625, "loss_num": 0.043701171875, "loss_xval": 0.64453125, "num_input_tokens_seen": 117457176, "step": 1874 }, { "epoch": 6.239600665557404, "grad_norm": 8.297738075256348, "learning_rate": 5e-06, "loss": 0.5623, "num_input_tokens_seen": 117518696, "step": 1875 }, { "epoch": 6.239600665557404, "loss": 0.5319926142692566, "loss_ce": 0.00019328358757775277, "loss_iou": 0.181640625, "loss_num": 0.03369140625, "loss_xval": 0.53125, "num_input_tokens_seen": 117518696, "step": 1875 }, { "epoch": 6.242928452579035, "grad_norm": 7.489507675170898, "learning_rate": 5e-06, "loss": 0.5349, "num_input_tokens_seen": 117581024, "step": 1876 }, { "epoch": 6.242928452579035, "loss": 0.401066392660141, "loss_ce": 4.387100489111617e-06, "loss_iou": 0.1484375, "loss_num": 0.020751953125, "loss_xval": 0.400390625, "num_input_tokens_seen": 117581024, "step": 1876 }, { "epoch": 6.246256239600665, "grad_norm": 19.296171188354492, "learning_rate": 5e-06, "loss": 0.6984, "num_input_tokens_seen": 117646404, "step": 1877 }, { "epoch": 6.246256239600665, "loss": 0.7436515092849731, "loss_ce": 0.0007315932889468968, "loss_iou": 0.2890625, "loss_num": 0.033447265625, "loss_xval": 0.7421875, "num_input_tokens_seen": 117646404, "step": 1877 }, { "epoch": 6.249584026622296, "grad_norm": 29.02984046936035, "learning_rate": 5e-06, "loss": 0.6827, "num_input_tokens_seen": 117709528, "step": 1878 }, { "epoch": 6.249584026622296, "loss": 0.7689893245697021, "loss_ce": 7.337753686442738e-06, "loss_iou": 0.296875, "loss_num": 0.035400390625, "loss_xval": 0.76953125, "num_input_tokens_seen": 117709528, "step": 1878 }, { "epoch": 6.252911813643927, "grad_norm": 45.192562103271484, "learning_rate": 5e-06, "loss": 0.6448, "num_input_tokens_seen": 117772044, "step": 1879 }, { "epoch": 6.252911813643927, "loss": 0.6723182201385498, "loss_ce": 1.5984420315362513e-05, "loss_iou": 0.26171875, "loss_num": 0.029541015625, "loss_xval": 0.671875, "num_input_tokens_seen": 117772044, "step": 1879 }, { "epoch": 6.256239600665557, "grad_norm": 29.018625259399414, "learning_rate": 5e-06, "loss": 0.5516, "num_input_tokens_seen": 117834036, "step": 1880 }, { "epoch": 6.256239600665557, "loss": 0.5316672921180725, "loss_ce": 0.0001731569936964661, "loss_iou": 0.171875, "loss_num": 0.03759765625, "loss_xval": 0.53125, "num_input_tokens_seen": 117834036, "step": 1880 }, { "epoch": 6.259567387687188, "grad_norm": 23.841171264648438, "learning_rate": 5e-06, "loss": 0.7798, "num_input_tokens_seen": 117895044, "step": 1881 }, { "epoch": 6.259567387687188, "loss": 0.7895559072494507, "loss_ce": 5.09536675963318e-06, "loss_iou": 0.302734375, "loss_num": 0.037109375, "loss_xval": 0.7890625, "num_input_tokens_seen": 117895044, "step": 1881 }, { "epoch": 6.262895174708818, "grad_norm": 28.532699584960938, "learning_rate": 5e-06, "loss": 0.5149, "num_input_tokens_seen": 117956884, "step": 1882 }, { "epoch": 6.262895174708818, "loss": 0.6902905702590942, "loss_ce": 0.0007154002669267356, "loss_iou": 0.21484375, "loss_num": 0.052001953125, "loss_xval": 0.69140625, "num_input_tokens_seen": 117956884, "step": 1882 }, { "epoch": 6.266222961730449, "grad_norm": 17.87752342224121, "learning_rate": 5e-06, "loss": 0.7271, "num_input_tokens_seen": 118019364, "step": 1883 }, { "epoch": 6.266222961730449, "loss": 0.7441762089729309, "loss_ce": 3.5563556593842804e-05, "loss_iou": 0.28125, "loss_num": 0.0361328125, "loss_xval": 0.7421875, "num_input_tokens_seen": 118019364, "step": 1883 }, { "epoch": 6.26955074875208, "grad_norm": 25.544570922851562, "learning_rate": 5e-06, "loss": 0.6711, "num_input_tokens_seen": 118082380, "step": 1884 }, { "epoch": 6.26955074875208, "loss": 0.7013899087905884, "loss_ce": 0.0003401026770006865, "loss_iou": 0.2734375, "loss_num": 0.030517578125, "loss_xval": 0.69921875, "num_input_tokens_seen": 118082380, "step": 1884 }, { "epoch": 6.27287853577371, "grad_norm": 21.333023071289062, "learning_rate": 5e-06, "loss": 0.6432, "num_input_tokens_seen": 118145304, "step": 1885 }, { "epoch": 6.27287853577371, "loss": 0.3440302908420563, "loss_ce": 0.00015822664136067033, "loss_iou": 0.12109375, "loss_num": 0.0203857421875, "loss_xval": 0.34375, "num_input_tokens_seen": 118145304, "step": 1885 }, { "epoch": 6.276206322795341, "grad_norm": 11.410968780517578, "learning_rate": 5e-06, "loss": 0.8493, "num_input_tokens_seen": 118205404, "step": 1886 }, { "epoch": 6.276206322795341, "loss": 0.6452671885490417, "loss_ce": 3.5297755403007613e-06, "loss_iou": 0.2080078125, "loss_num": 0.045654296875, "loss_xval": 0.64453125, "num_input_tokens_seen": 118205404, "step": 1886 }, { "epoch": 6.2795341098169715, "grad_norm": 15.16508674621582, "learning_rate": 5e-06, "loss": 0.746, "num_input_tokens_seen": 118268188, "step": 1887 }, { "epoch": 6.2795341098169715, "loss": 0.8573170900344849, "loss_ce": 0.00013937031326349825, "loss_iou": 0.2890625, "loss_num": 0.055419921875, "loss_xval": 0.85546875, "num_input_tokens_seen": 118268188, "step": 1887 }, { "epoch": 6.282861896838602, "grad_norm": 19.193706512451172, "learning_rate": 5e-06, "loss": 0.5648, "num_input_tokens_seen": 118331420, "step": 1888 }, { "epoch": 6.282861896838602, "loss": 0.5550625324249268, "loss_ce": 0.00037507241358980536, "loss_iou": 0.228515625, "loss_num": 0.0194091796875, "loss_xval": 0.5546875, "num_input_tokens_seen": 118331420, "step": 1888 }, { "epoch": 6.286189683860233, "grad_norm": 10.598845481872559, "learning_rate": 5e-06, "loss": 0.4831, "num_input_tokens_seen": 118394204, "step": 1889 }, { "epoch": 6.286189683860233, "loss": 0.45692452788352966, "loss_ce": 0.00047309871297329664, "loss_iou": 0.14453125, "loss_num": 0.033447265625, "loss_xval": 0.45703125, "num_input_tokens_seen": 118394204, "step": 1889 }, { "epoch": 6.289517470881863, "grad_norm": 18.339231491088867, "learning_rate": 5e-06, "loss": 0.3393, "num_input_tokens_seen": 118456836, "step": 1890 }, { "epoch": 6.289517470881863, "loss": 0.35476088523864746, "loss_ce": 2.4531174858566374e-05, "loss_iou": 0.125, "loss_num": 0.0208740234375, "loss_xval": 0.35546875, "num_input_tokens_seen": 118456836, "step": 1890 }, { "epoch": 6.292845257903494, "grad_norm": 42.98812484741211, "learning_rate": 5e-06, "loss": 0.9685, "num_input_tokens_seen": 118520496, "step": 1891 }, { "epoch": 6.292845257903494, "loss": 0.9763355255126953, "loss_ce": 0.0005359815550036728, "loss_iou": 0.3203125, "loss_num": 0.0673828125, "loss_xval": 0.9765625, "num_input_tokens_seen": 118520496, "step": 1891 }, { "epoch": 6.2961730449251245, "grad_norm": 7.72786283493042, "learning_rate": 5e-06, "loss": 0.5755, "num_input_tokens_seen": 118583420, "step": 1892 }, { "epoch": 6.2961730449251245, "loss": 0.615013599395752, "loss_ce": 2.342194420634769e-05, "loss_iou": 0.208984375, "loss_num": 0.03955078125, "loss_xval": 0.61328125, "num_input_tokens_seen": 118583420, "step": 1892 }, { "epoch": 6.299500831946755, "grad_norm": 7.7686448097229, "learning_rate": 5e-06, "loss": 0.5277, "num_input_tokens_seen": 118644200, "step": 1893 }, { "epoch": 6.299500831946755, "loss": 0.6303321123123169, "loss_ce": 8.307768439408392e-05, "loss_iou": 0.1845703125, "loss_num": 0.052490234375, "loss_xval": 0.62890625, "num_input_tokens_seen": 118644200, "step": 1893 }, { "epoch": 6.302828618968386, "grad_norm": 14.28267765045166, "learning_rate": 5e-06, "loss": 0.6133, "num_input_tokens_seen": 118706948, "step": 1894 }, { "epoch": 6.302828618968386, "loss": 0.6571691036224365, "loss_ce": 0.00036982051096856594, "loss_iou": 0.220703125, "loss_num": 0.04296875, "loss_xval": 0.65625, "num_input_tokens_seen": 118706948, "step": 1894 }, { "epoch": 6.306156405990016, "grad_norm": 14.473723411560059, "learning_rate": 5e-06, "loss": 0.7662, "num_input_tokens_seen": 118770088, "step": 1895 }, { "epoch": 6.306156405990016, "loss": 0.6408983469009399, "loss_ce": 2.9183007427491248e-05, "loss_iou": 0.19140625, "loss_num": 0.052001953125, "loss_xval": 0.640625, "num_input_tokens_seen": 118770088, "step": 1895 }, { "epoch": 6.309484193011647, "grad_norm": 11.187668800354004, "learning_rate": 5e-06, "loss": 0.8763, "num_input_tokens_seen": 118833268, "step": 1896 }, { "epoch": 6.309484193011647, "loss": 0.8932377099990845, "loss_ce": 0.00029332979465834796, "loss_iou": 0.322265625, "loss_num": 0.04931640625, "loss_xval": 0.89453125, "num_input_tokens_seen": 118833268, "step": 1896 }, { "epoch": 6.312811980033278, "grad_norm": 20.483861923217773, "learning_rate": 5e-06, "loss": 0.62, "num_input_tokens_seen": 118895864, "step": 1897 }, { "epoch": 6.312811980033278, "loss": 0.6993443965911865, "loss_ce": 0.0007360352901741862, "loss_iou": 0.224609375, "loss_num": 0.0498046875, "loss_xval": 0.69921875, "num_input_tokens_seen": 118895864, "step": 1897 }, { "epoch": 6.316139767054908, "grad_norm": 8.918088912963867, "learning_rate": 5e-06, "loss": 0.5872, "num_input_tokens_seen": 118958404, "step": 1898 }, { "epoch": 6.316139767054908, "loss": 0.6274863481521606, "loss_ce": 0.0011435841443017125, "loss_iou": 0.1982421875, "loss_num": 0.046142578125, "loss_xval": 0.625, "num_input_tokens_seen": 118958404, "step": 1898 }, { "epoch": 6.319467554076539, "grad_norm": 13.76313591003418, "learning_rate": 5e-06, "loss": 0.7277, "num_input_tokens_seen": 119021880, "step": 1899 }, { "epoch": 6.319467554076539, "loss": 0.6522097587585449, "loss_ce": 0.0009646408725529909, "loss_iou": 0.2255859375, "loss_num": 0.0400390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 119021880, "step": 1899 }, { "epoch": 6.322795341098169, "grad_norm": 8.077730178833008, "learning_rate": 5e-06, "loss": 0.5365, "num_input_tokens_seen": 119085200, "step": 1900 }, { "epoch": 6.322795341098169, "loss": 0.3900187015533447, "loss_ce": 4.0382278712058906e-06, "loss_iou": 0.11328125, "loss_num": 0.03271484375, "loss_xval": 0.390625, "num_input_tokens_seen": 119085200, "step": 1900 }, { "epoch": 6.3261231281198, "grad_norm": 20.717100143432617, "learning_rate": 5e-06, "loss": 0.662, "num_input_tokens_seen": 119149340, "step": 1901 }, { "epoch": 6.3261231281198, "loss": 0.5592089891433716, "loss_ce": 0.0011035435600206256, "loss_iou": 0.2021484375, "loss_num": 0.0308837890625, "loss_xval": 0.55859375, "num_input_tokens_seen": 119149340, "step": 1901 }, { "epoch": 6.329450915141431, "grad_norm": 11.843432426452637, "learning_rate": 5e-06, "loss": 0.5203, "num_input_tokens_seen": 119209956, "step": 1902 }, { "epoch": 6.329450915141431, "loss": 0.6044921875, "loss_ce": 0.0002441465330775827, "loss_iou": 0.23046875, "loss_num": 0.02880859375, "loss_xval": 0.60546875, "num_input_tokens_seen": 119209956, "step": 1902 }, { "epoch": 6.332778702163061, "grad_norm": 11.223464012145996, "learning_rate": 5e-06, "loss": 0.6212, "num_input_tokens_seen": 119269532, "step": 1903 }, { "epoch": 6.332778702163061, "loss": 0.6058506369590759, "loss_ce": 1.5660873032175004e-05, "loss_iou": 0.166015625, "loss_num": 0.0546875, "loss_xval": 0.60546875, "num_input_tokens_seen": 119269532, "step": 1903 }, { "epoch": 6.336106489184692, "grad_norm": 27.761043548583984, "learning_rate": 5e-06, "loss": 0.8632, "num_input_tokens_seen": 119333156, "step": 1904 }, { "epoch": 6.336106489184692, "loss": 0.6953175067901611, "loss_ce": 5.025212885811925e-06, "loss_iou": 0.2431640625, "loss_num": 0.0419921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 119333156, "step": 1904 }, { "epoch": 6.3394342762063225, "grad_norm": 22.749303817749023, "learning_rate": 5e-06, "loss": 0.5647, "num_input_tokens_seen": 119394532, "step": 1905 }, { "epoch": 6.3394342762063225, "loss": 0.5662988424301147, "loss_ce": 0.001113282167352736, "loss_iou": 0.203125, "loss_num": 0.03173828125, "loss_xval": 0.56640625, "num_input_tokens_seen": 119394532, "step": 1905 }, { "epoch": 6.342762063227953, "grad_norm": 11.654192924499512, "learning_rate": 5e-06, "loss": 0.6636, "num_input_tokens_seen": 119457736, "step": 1906 }, { "epoch": 6.342762063227953, "loss": 0.8046139478683472, "loss_ce": 0.0006588406395167112, "loss_iou": 0.28515625, "loss_num": 0.04638671875, "loss_xval": 0.8046875, "num_input_tokens_seen": 119457736, "step": 1906 }, { "epoch": 6.346089850249584, "grad_norm": 25.141996383666992, "learning_rate": 5e-06, "loss": 0.6719, "num_input_tokens_seen": 119521436, "step": 1907 }, { "epoch": 6.346089850249584, "loss": 0.4867073893547058, "loss_ce": 1.305484965996584e-05, "loss_iou": 0.1826171875, "loss_num": 0.0244140625, "loss_xval": 0.486328125, "num_input_tokens_seen": 119521436, "step": 1907 }, { "epoch": 6.349417637271214, "grad_norm": 8.515308380126953, "learning_rate": 5e-06, "loss": 0.5758, "num_input_tokens_seen": 119583924, "step": 1908 }, { "epoch": 6.349417637271214, "loss": 0.5766646862030029, "loss_ce": 4.5053284338791855e-06, "loss_iou": 0.23828125, "loss_num": 0.02001953125, "loss_xval": 0.578125, "num_input_tokens_seen": 119583924, "step": 1908 }, { "epoch": 6.352745424292845, "grad_norm": 25.861650466918945, "learning_rate": 5e-06, "loss": 0.5609, "num_input_tokens_seen": 119644324, "step": 1909 }, { "epoch": 6.352745424292845, "loss": 0.5296666026115417, "loss_ce": 3.480890427454142e-06, "loss_iou": 0.201171875, "loss_num": 0.025390625, "loss_xval": 0.53125, "num_input_tokens_seen": 119644324, "step": 1909 }, { "epoch": 6.356073211314476, "grad_norm": 17.014137268066406, "learning_rate": 5e-06, "loss": 0.4881, "num_input_tokens_seen": 119707332, "step": 1910 }, { "epoch": 6.356073211314476, "loss": 0.32322195172309875, "loss_ce": 0.00010183690028497949, "loss_iou": 0.1259765625, "loss_num": 0.01434326171875, "loss_xval": 0.322265625, "num_input_tokens_seen": 119707332, "step": 1910 }, { "epoch": 6.359400998336106, "grad_norm": 42.42847442626953, "learning_rate": 5e-06, "loss": 0.6808, "num_input_tokens_seen": 119770608, "step": 1911 }, { "epoch": 6.359400998336106, "loss": 0.4963526129722595, "loss_ce": 1.4706164620292839e-05, "loss_iou": 0.14453125, "loss_num": 0.041748046875, "loss_xval": 0.49609375, "num_input_tokens_seen": 119770608, "step": 1911 }, { "epoch": 6.362728785357737, "grad_norm": 90.03009033203125, "learning_rate": 5e-06, "loss": 0.6118, "num_input_tokens_seen": 119832564, "step": 1912 }, { "epoch": 6.362728785357737, "loss": 0.5688121914863586, "loss_ce": 1.0294821549905464e-05, "loss_iou": 0.208984375, "loss_num": 0.030029296875, "loss_xval": 0.5703125, "num_input_tokens_seen": 119832564, "step": 1912 }, { "epoch": 6.366056572379367, "grad_norm": 21.129972457885742, "learning_rate": 5e-06, "loss": 0.7614, "num_input_tokens_seen": 119895428, "step": 1913 }, { "epoch": 6.366056572379367, "loss": 0.6494866609573364, "loss_ce": 0.0001946888369275257, "loss_iou": 0.203125, "loss_num": 0.048828125, "loss_xval": 0.6484375, "num_input_tokens_seen": 119895428, "step": 1913 }, { "epoch": 6.369384359400998, "grad_norm": 23.620243072509766, "learning_rate": 5e-06, "loss": 0.8151, "num_input_tokens_seen": 119958696, "step": 1914 }, { "epoch": 6.369384359400998, "loss": 0.8362605571746826, "loss_ce": 7.890413689892739e-05, "loss_iou": 0.30078125, "loss_num": 0.047119140625, "loss_xval": 0.8359375, "num_input_tokens_seen": 119958696, "step": 1914 }, { "epoch": 6.372712146422629, "grad_norm": 16.94724464416504, "learning_rate": 5e-06, "loss": 0.552, "num_input_tokens_seen": 120021096, "step": 1915 }, { "epoch": 6.372712146422629, "loss": 0.6032967567443848, "loss_ce": 0.0002694588038139045, "loss_iou": 0.2392578125, "loss_num": 0.0247802734375, "loss_xval": 0.6015625, "num_input_tokens_seen": 120021096, "step": 1915 }, { "epoch": 6.376039933444259, "grad_norm": 15.850129127502441, "learning_rate": 5e-06, "loss": 0.5145, "num_input_tokens_seen": 120081864, "step": 1916 }, { "epoch": 6.376039933444259, "loss": 0.4399433135986328, "loss_ce": 1.9184694792784285e-06, "loss_iou": 0.140625, "loss_num": 0.03173828125, "loss_xval": 0.439453125, "num_input_tokens_seen": 120081864, "step": 1916 }, { "epoch": 6.37936772046589, "grad_norm": 10.974051475524902, "learning_rate": 5e-06, "loss": 0.7344, "num_input_tokens_seen": 120145864, "step": 1917 }, { "epoch": 6.37936772046589, "loss": 0.6459264755249023, "loss_ce": 0.0019140413496643305, "loss_iou": 0.2490234375, "loss_num": 0.029052734375, "loss_xval": 0.64453125, "num_input_tokens_seen": 120145864, "step": 1917 }, { "epoch": 6.3826955074875205, "grad_norm": 22.09915542602539, "learning_rate": 5e-06, "loss": 0.5428, "num_input_tokens_seen": 120208220, "step": 1918 }, { "epoch": 6.3826955074875205, "loss": 0.6734851598739624, "loss_ce": 0.0007557276985608041, "loss_iou": 0.1611328125, "loss_num": 0.0703125, "loss_xval": 0.671875, "num_input_tokens_seen": 120208220, "step": 1918 }, { "epoch": 6.386023294509151, "grad_norm": 17.634904861450195, "learning_rate": 5e-06, "loss": 0.7517, "num_input_tokens_seen": 120269832, "step": 1919 }, { "epoch": 6.386023294509151, "loss": 0.6596292853355408, "loss_ce": 8.335949678439647e-05, "loss_iou": 0.2412109375, "loss_num": 0.035400390625, "loss_xval": 0.66015625, "num_input_tokens_seen": 120269832, "step": 1919 }, { "epoch": 6.389351081530782, "grad_norm": 16.73068618774414, "learning_rate": 5e-06, "loss": 0.4661, "num_input_tokens_seen": 120329824, "step": 1920 }, { "epoch": 6.389351081530782, "loss": 0.5087909698486328, "loss_ce": 1.963873046406661e-06, "loss_iou": 0.13671875, "loss_num": 0.046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 120329824, "step": 1920 }, { "epoch": 6.392678868552412, "grad_norm": 26.34819984436035, "learning_rate": 5e-06, "loss": 0.6769, "num_input_tokens_seen": 120392212, "step": 1921 }, { "epoch": 6.392678868552412, "loss": 0.42969781160354614, "loss_ce": 0.000803784467279911, "loss_iou": 0.1328125, "loss_num": 0.032958984375, "loss_xval": 0.4296875, "num_input_tokens_seen": 120392212, "step": 1921 }, { "epoch": 6.396006655574043, "grad_norm": 6.768531799316406, "learning_rate": 5e-06, "loss": 0.6683, "num_input_tokens_seen": 120454584, "step": 1922 }, { "epoch": 6.396006655574043, "loss": 0.9003890752792358, "loss_ce": 0.00024259740894194692, "loss_iou": 0.318359375, "loss_num": 0.052734375, "loss_xval": 0.8984375, "num_input_tokens_seen": 120454584, "step": 1922 }, { "epoch": 6.3993344425956735, "grad_norm": 8.30113697052002, "learning_rate": 5e-06, "loss": 0.6405, "num_input_tokens_seen": 120514044, "step": 1923 }, { "epoch": 6.3993344425956735, "loss": 0.7188661098480225, "loss_ce": 0.00011609155626501888, "loss_iou": 0.1796875, "loss_num": 0.07177734375, "loss_xval": 0.71875, "num_input_tokens_seen": 120514044, "step": 1923 }, { "epoch": 6.402662229617304, "grad_norm": 15.520536422729492, "learning_rate": 5e-06, "loss": 0.7244, "num_input_tokens_seen": 120576424, "step": 1924 }, { "epoch": 6.402662229617304, "loss": 0.5446481108665466, "loss_ce": 0.00033657567109912634, "loss_iou": 0.1962890625, "loss_num": 0.0303955078125, "loss_xval": 0.54296875, "num_input_tokens_seen": 120576424, "step": 1924 }, { "epoch": 6.405990016638935, "grad_norm": 12.706613540649414, "learning_rate": 5e-06, "loss": 0.5809, "num_input_tokens_seen": 120638852, "step": 1925 }, { "epoch": 6.405990016638935, "loss": 0.6418596506118774, "loss_ce": 0.0006242538802325726, "loss_iou": 0.224609375, "loss_num": 0.03857421875, "loss_xval": 0.640625, "num_input_tokens_seen": 120638852, "step": 1925 }, { "epoch": 6.409317803660565, "grad_norm": 12.72490119934082, "learning_rate": 5e-06, "loss": 0.5454, "num_input_tokens_seen": 120701328, "step": 1926 }, { "epoch": 6.409317803660565, "loss": 0.5837253332138062, "loss_ce": 0.0001071855440386571, "loss_iou": 0.220703125, "loss_num": 0.02880859375, "loss_xval": 0.58203125, "num_input_tokens_seen": 120701328, "step": 1926 }, { "epoch": 6.412645590682196, "grad_norm": 15.018410682678223, "learning_rate": 5e-06, "loss": 0.5549, "num_input_tokens_seen": 120763848, "step": 1927 }, { "epoch": 6.412645590682196, "loss": 0.5490936040878296, "loss_ce": 0.0002655147691257298, "loss_iou": 0.1669921875, "loss_num": 0.043212890625, "loss_xval": 0.546875, "num_input_tokens_seen": 120763848, "step": 1927 }, { "epoch": 6.415973377703827, "grad_norm": 14.566852569580078, "learning_rate": 5e-06, "loss": 0.8448, "num_input_tokens_seen": 120826228, "step": 1928 }, { "epoch": 6.415973377703827, "loss": 1.2861429452896118, "loss_ce": 1.0124242180609144e-05, "loss_iou": 0.4453125, "loss_num": 0.07861328125, "loss_xval": 1.2890625, "num_input_tokens_seen": 120826228, "step": 1928 }, { "epoch": 6.419301164725457, "grad_norm": 11.734906196594238, "learning_rate": 5e-06, "loss": 0.584, "num_input_tokens_seen": 120887768, "step": 1929 }, { "epoch": 6.419301164725457, "loss": 0.5992169976234436, "loss_ce": 0.0011334889568388462, "loss_iou": 0.19921875, "loss_num": 0.0400390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 120887768, "step": 1929 }, { "epoch": 6.422628951747088, "grad_norm": 15.214763641357422, "learning_rate": 5e-06, "loss": 0.6457, "num_input_tokens_seen": 120950624, "step": 1930 }, { "epoch": 6.422628951747088, "loss": 0.8110338449478149, "loss_ce": 0.00018182062194682658, "loss_iou": 0.279296875, "loss_num": 0.050048828125, "loss_xval": 0.8125, "num_input_tokens_seen": 120950624, "step": 1930 }, { "epoch": 6.425956738768718, "grad_norm": 28.60011863708496, "learning_rate": 5e-06, "loss": 0.6793, "num_input_tokens_seen": 121013536, "step": 1931 }, { "epoch": 6.425956738768718, "loss": 0.49274346232414246, "loss_ce": 0.0004949223366566002, "loss_iou": 0.1591796875, "loss_num": 0.034912109375, "loss_xval": 0.4921875, "num_input_tokens_seen": 121013536, "step": 1931 }, { "epoch": 6.429284525790349, "grad_norm": 12.213953971862793, "learning_rate": 5e-06, "loss": 0.7479, "num_input_tokens_seen": 121075756, "step": 1932 }, { "epoch": 6.429284525790349, "loss": 0.773937463760376, "loss_ce": 1.1653934961941559e-05, "loss_iou": 0.2255859375, "loss_num": 0.064453125, "loss_xval": 0.7734375, "num_input_tokens_seen": 121075756, "step": 1932 }, { "epoch": 6.43261231281198, "grad_norm": 15.571845054626465, "learning_rate": 5e-06, "loss": 0.6698, "num_input_tokens_seen": 121139652, "step": 1933 }, { "epoch": 6.43261231281198, "loss": 0.6814624071121216, "loss_ce": 0.0027514593675732613, "loss_iou": 0.27734375, "loss_num": 0.02490234375, "loss_xval": 0.6796875, "num_input_tokens_seen": 121139652, "step": 1933 }, { "epoch": 6.43594009983361, "grad_norm": 13.807389259338379, "learning_rate": 5e-06, "loss": 0.9253, "num_input_tokens_seen": 121203024, "step": 1934 }, { "epoch": 6.43594009983361, "loss": 1.345597505569458, "loss_ce": 0.0008709065150469542, "loss_iou": 0.466796875, "loss_num": 0.08251953125, "loss_xval": 1.34375, "num_input_tokens_seen": 121203024, "step": 1934 }, { "epoch": 6.439267886855241, "grad_norm": 12.07575511932373, "learning_rate": 5e-06, "loss": 0.5334, "num_input_tokens_seen": 121266420, "step": 1935 }, { "epoch": 6.439267886855241, "loss": 0.5801987648010254, "loss_ce": 0.00012065586633980274, "loss_iou": 0.1796875, "loss_num": 0.044189453125, "loss_xval": 0.578125, "num_input_tokens_seen": 121266420, "step": 1935 }, { "epoch": 6.4425956738768715, "grad_norm": 12.738320350646973, "learning_rate": 5e-06, "loss": 0.7129, "num_input_tokens_seen": 121330352, "step": 1936 }, { "epoch": 6.4425956738768715, "loss": 0.656618595123291, "loss_ce": 0.00012446939945220947, "loss_iou": 0.2197265625, "loss_num": 0.04345703125, "loss_xval": 0.65625, "num_input_tokens_seen": 121330352, "step": 1936 }, { "epoch": 6.445923460898502, "grad_norm": 14.938615798950195, "learning_rate": 5e-06, "loss": 0.6309, "num_input_tokens_seen": 121394128, "step": 1937 }, { "epoch": 6.445923460898502, "loss": 0.8611018657684326, "loss_ce": 0.0016048324760049582, "loss_iou": 0.30859375, "loss_num": 0.048828125, "loss_xval": 0.859375, "num_input_tokens_seen": 121394128, "step": 1937 }, { "epoch": 6.449251247920133, "grad_norm": 21.565048217773438, "learning_rate": 5e-06, "loss": 0.5475, "num_input_tokens_seen": 121457512, "step": 1938 }, { "epoch": 6.449251247920133, "loss": 0.39604151248931885, "loss_ce": 4.542524766293354e-05, "loss_iou": 0.126953125, "loss_num": 0.028564453125, "loss_xval": 0.396484375, "num_input_tokens_seen": 121457512, "step": 1938 }, { "epoch": 6.452579034941763, "grad_norm": 35.06698989868164, "learning_rate": 5e-06, "loss": 0.8538, "num_input_tokens_seen": 121520800, "step": 1939 }, { "epoch": 6.452579034941763, "loss": 0.8477818965911865, "loss_ce": 3.5593729990068823e-06, "loss_iou": 0.265625, "loss_num": 0.0634765625, "loss_xval": 0.84765625, "num_input_tokens_seen": 121520800, "step": 1939 }, { "epoch": 6.455906821963394, "grad_norm": 45.28481674194336, "learning_rate": 5e-06, "loss": 0.97, "num_input_tokens_seen": 121585016, "step": 1940 }, { "epoch": 6.455906821963394, "loss": 0.9006417989730835, "loss_ce": 7.0053438321338035e-06, "loss_iou": 0.306640625, "loss_num": 0.05810546875, "loss_xval": 0.90234375, "num_input_tokens_seen": 121585016, "step": 1940 }, { "epoch": 6.4592346089850246, "grad_norm": 39.1356086730957, "learning_rate": 5e-06, "loss": 0.6982, "num_input_tokens_seen": 121647252, "step": 1941 }, { "epoch": 6.4592346089850246, "loss": 0.6142480969429016, "loss_ce": 0.0001123529946198687, "loss_iou": 0.212890625, "loss_num": 0.03759765625, "loss_xval": 0.61328125, "num_input_tokens_seen": 121647252, "step": 1941 }, { "epoch": 6.462562396006655, "grad_norm": 37.34075164794922, "learning_rate": 5e-06, "loss": 0.6763, "num_input_tokens_seen": 121710464, "step": 1942 }, { "epoch": 6.462562396006655, "loss": 0.7514750957489014, "loss_ce": 1.022259584715357e-05, "loss_iou": 0.30078125, "loss_num": 0.0299072265625, "loss_xval": 0.75, "num_input_tokens_seen": 121710464, "step": 1942 }, { "epoch": 6.465890183028286, "grad_norm": 28.683731079101562, "learning_rate": 5e-06, "loss": 0.5492, "num_input_tokens_seen": 121771684, "step": 1943 }, { "epoch": 6.465890183028286, "loss": 0.34131908416748047, "loss_ce": 0.0007429145043715835, "loss_iou": 0.080078125, "loss_num": 0.0361328125, "loss_xval": 0.33984375, "num_input_tokens_seen": 121771684, "step": 1943 }, { "epoch": 6.469217970049916, "grad_norm": 16.91063690185547, "learning_rate": 5e-06, "loss": 0.6058, "num_input_tokens_seen": 121836040, "step": 1944 }, { "epoch": 6.469217970049916, "loss": 0.6735157370567322, "loss_ce": 0.0002979549753945321, "loss_iou": 0.28125, "loss_num": 0.0223388671875, "loss_xval": 0.671875, "num_input_tokens_seen": 121836040, "step": 1944 }, { "epoch": 6.472545757071547, "grad_norm": 20.4154109954834, "learning_rate": 5e-06, "loss": 0.6285, "num_input_tokens_seen": 121895316, "step": 1945 }, { "epoch": 6.472545757071547, "loss": 0.8378450274467468, "loss_ce": 7.650956104043871e-05, "loss_iou": 0.2470703125, "loss_num": 0.06884765625, "loss_xval": 0.8359375, "num_input_tokens_seen": 121895316, "step": 1945 }, { "epoch": 6.475873544093178, "grad_norm": 42.6540412902832, "learning_rate": 5e-06, "loss": 0.7617, "num_input_tokens_seen": 121958684, "step": 1946 }, { "epoch": 6.475873544093178, "loss": 1.069746732711792, "loss_ce": 0.0015093846013769507, "loss_iou": 0.38671875, "loss_num": 0.05859375, "loss_xval": 1.0703125, "num_input_tokens_seen": 121958684, "step": 1946 }, { "epoch": 6.479201331114808, "grad_norm": 15.946682929992676, "learning_rate": 5e-06, "loss": 0.7402, "num_input_tokens_seen": 122020744, "step": 1947 }, { "epoch": 6.479201331114808, "loss": 0.6518175601959229, "loss_ce": 0.00026731760590337217, "loss_iou": 0.154296875, "loss_num": 0.06884765625, "loss_xval": 0.65234375, "num_input_tokens_seen": 122020744, "step": 1947 }, { "epoch": 6.482529118136439, "grad_norm": 10.553583145141602, "learning_rate": 5e-06, "loss": 0.5386, "num_input_tokens_seen": 122083412, "step": 1948 }, { "epoch": 6.482529118136439, "loss": 0.7277462482452393, "loss_ce": 0.00020720537577290088, "loss_iou": 0.298828125, "loss_num": 0.0257568359375, "loss_xval": 0.7265625, "num_input_tokens_seen": 122083412, "step": 1948 }, { "epoch": 6.4858569051580695, "grad_norm": 11.314140319824219, "learning_rate": 5e-06, "loss": 0.7428, "num_input_tokens_seen": 122145288, "step": 1949 }, { "epoch": 6.4858569051580695, "loss": 0.7091948986053467, "loss_ce": 8.838271605782211e-05, "loss_iou": 0.2578125, "loss_num": 0.038818359375, "loss_xval": 0.7109375, "num_input_tokens_seen": 122145288, "step": 1949 }, { "epoch": 6.4891846921797, "grad_norm": 67.86624145507812, "learning_rate": 5e-06, "loss": 0.7234, "num_input_tokens_seen": 122208008, "step": 1950 }, { "epoch": 6.4891846921797, "loss": 0.6662644147872925, "loss_ce": 4.646051820600405e-06, "loss_iou": 0.263671875, "loss_num": 0.0277099609375, "loss_xval": 0.66796875, "num_input_tokens_seen": 122208008, "step": 1950 }, { "epoch": 6.492512479201331, "grad_norm": 15.304253578186035, "learning_rate": 5e-06, "loss": 0.5958, "num_input_tokens_seen": 122268396, "step": 1951 }, { "epoch": 6.492512479201331, "loss": 0.680047869682312, "loss_ce": 0.00011620179429883137, "loss_iou": 0.19140625, "loss_num": 0.0595703125, "loss_xval": 0.6796875, "num_input_tokens_seen": 122268396, "step": 1951 }, { "epoch": 6.495840266222961, "grad_norm": 13.426730155944824, "learning_rate": 5e-06, "loss": 0.6756, "num_input_tokens_seen": 122331456, "step": 1952 }, { "epoch": 6.495840266222961, "loss": 0.5398004055023193, "loss_ce": 5.481046173372306e-06, "loss_iou": 0.1591796875, "loss_num": 0.04443359375, "loss_xval": 0.5390625, "num_input_tokens_seen": 122331456, "step": 1952 }, { "epoch": 6.499168053244592, "grad_norm": 6.796942710876465, "learning_rate": 5e-06, "loss": 0.5479, "num_input_tokens_seen": 122395172, "step": 1953 }, { "epoch": 6.499168053244592, "loss": 0.40055206418037415, "loss_ce": 0.0006497133290395141, "loss_iou": 0.13671875, "loss_num": 0.0252685546875, "loss_xval": 0.400390625, "num_input_tokens_seen": 122395172, "step": 1953 }, { "epoch": 6.5024958402662225, "grad_norm": 8.822606086730957, "learning_rate": 5e-06, "loss": 0.6339, "num_input_tokens_seen": 122456692, "step": 1954 }, { "epoch": 6.5024958402662225, "loss": 0.6271539330482483, "loss_ce": 0.0003228959976695478, "loss_iou": 0.203125, "loss_num": 0.0439453125, "loss_xval": 0.625, "num_input_tokens_seen": 122456692, "step": 1954 }, { "epoch": 6.505823627287853, "grad_norm": 14.753844261169434, "learning_rate": 5e-06, "loss": 0.7122, "num_input_tokens_seen": 122517856, "step": 1955 }, { "epoch": 6.505823627287853, "loss": 0.7032543420791626, "loss_ce": 0.00012933007383253425, "loss_iou": 0.15234375, "loss_num": 0.080078125, "loss_xval": 0.703125, "num_input_tokens_seen": 122517856, "step": 1955 }, { "epoch": 6.509151414309484, "grad_norm": 15.91337776184082, "learning_rate": 5e-06, "loss": 0.5987, "num_input_tokens_seen": 122581004, "step": 1956 }, { "epoch": 6.509151414309484, "loss": 0.6297885179519653, "loss_ce": 0.00014987353642936796, "loss_iou": 0.236328125, "loss_num": 0.031494140625, "loss_xval": 0.62890625, "num_input_tokens_seen": 122581004, "step": 1956 }, { "epoch": 6.512479201331114, "grad_norm": 9.528816223144531, "learning_rate": 5e-06, "loss": 0.6616, "num_input_tokens_seen": 122644628, "step": 1957 }, { "epoch": 6.512479201331114, "loss": 0.6756832003593445, "loss_ce": 0.00039023166755214334, "loss_iou": 0.2314453125, "loss_num": 0.04248046875, "loss_xval": 0.67578125, "num_input_tokens_seen": 122644628, "step": 1957 }, { "epoch": 6.515806988352745, "grad_norm": 27.174596786499023, "learning_rate": 5e-06, "loss": 0.6126, "num_input_tokens_seen": 122708080, "step": 1958 }, { "epoch": 6.515806988352745, "loss": 0.6474884152412415, "loss_ce": 0.00027161132311448455, "loss_iou": 0.251953125, "loss_num": 0.028564453125, "loss_xval": 0.6484375, "num_input_tokens_seen": 122708080, "step": 1958 }, { "epoch": 6.519134775374376, "grad_norm": 19.91985321044922, "learning_rate": 5e-06, "loss": 0.5484, "num_input_tokens_seen": 122770044, "step": 1959 }, { "epoch": 6.519134775374376, "loss": 0.693800687789917, "loss_ce": 1.4089351680013351e-05, "loss_iou": 0.279296875, "loss_num": 0.0267333984375, "loss_xval": 0.6953125, "num_input_tokens_seen": 122770044, "step": 1959 }, { "epoch": 6.522462562396006, "grad_norm": 8.022729873657227, "learning_rate": 5e-06, "loss": 0.576, "num_input_tokens_seen": 122833492, "step": 1960 }, { "epoch": 6.522462562396006, "loss": 0.7717412114143372, "loss_ce": 0.00013476284220814705, "loss_iou": 0.2890625, "loss_num": 0.0390625, "loss_xval": 0.7734375, "num_input_tokens_seen": 122833492, "step": 1960 }, { "epoch": 6.525790349417637, "grad_norm": 18.420011520385742, "learning_rate": 5e-06, "loss": 0.5904, "num_input_tokens_seen": 122896032, "step": 1961 }, { "epoch": 6.525790349417637, "loss": 0.7451998591423035, "loss_ce": 2.1647940229740925e-05, "loss_iou": 0.267578125, "loss_num": 0.041748046875, "loss_xval": 0.74609375, "num_input_tokens_seen": 122896032, "step": 1961 }, { "epoch": 6.529118136439267, "grad_norm": 14.324840545654297, "learning_rate": 5e-06, "loss": 0.6027, "num_input_tokens_seen": 122957832, "step": 1962 }, { "epoch": 6.529118136439267, "loss": 0.6533394455909729, "loss_ce": 0.0006295054336078465, "loss_iou": 0.220703125, "loss_num": 0.04248046875, "loss_xval": 0.65234375, "num_input_tokens_seen": 122957832, "step": 1962 }, { "epoch": 6.532445923460898, "grad_norm": 11.380367279052734, "learning_rate": 5e-06, "loss": 0.6411, "num_input_tokens_seen": 123019992, "step": 1963 }, { "epoch": 6.532445923460898, "loss": 0.6608841419219971, "loss_ce": 0.00048379399231635034, "loss_iou": 0.224609375, "loss_num": 0.042236328125, "loss_xval": 0.66015625, "num_input_tokens_seen": 123019992, "step": 1963 }, { "epoch": 6.535773710482529, "grad_norm": 21.338104248046875, "learning_rate": 5e-06, "loss": 0.6764, "num_input_tokens_seen": 123083924, "step": 1964 }, { "epoch": 6.535773710482529, "loss": 0.6724074482917786, "loss_ce": 4.4186064769746736e-05, "loss_iou": 0.201171875, "loss_num": 0.05419921875, "loss_xval": 0.671875, "num_input_tokens_seen": 123083924, "step": 1964 }, { "epoch": 6.539101497504159, "grad_norm": 22.636884689331055, "learning_rate": 5e-06, "loss": 0.6027, "num_input_tokens_seen": 123144872, "step": 1965 }, { "epoch": 6.539101497504159, "loss": 0.6550688147544861, "loss_ce": 0.0002073537471005693, "loss_iou": 0.2158203125, "loss_num": 0.04443359375, "loss_xval": 0.65625, "num_input_tokens_seen": 123144872, "step": 1965 }, { "epoch": 6.54242928452579, "grad_norm": 56.49250411987305, "learning_rate": 5e-06, "loss": 0.5544, "num_input_tokens_seen": 123206232, "step": 1966 }, { "epoch": 6.54242928452579, "loss": 0.5780704617500305, "loss_ce": 6.4996402215911075e-06, "loss_iou": 0.126953125, "loss_num": 0.064453125, "loss_xval": 0.578125, "num_input_tokens_seen": 123206232, "step": 1966 }, { "epoch": 6.5457570715474205, "grad_norm": 10.766104698181152, "learning_rate": 5e-06, "loss": 0.4923, "num_input_tokens_seen": 123268996, "step": 1967 }, { "epoch": 6.5457570715474205, "loss": 0.5199794769287109, "loss_ce": 0.00032618455588817596, "loss_iou": 0.1640625, "loss_num": 0.038330078125, "loss_xval": 0.51953125, "num_input_tokens_seen": 123268996, "step": 1967 }, { "epoch": 6.549084858569051, "grad_norm": 11.645246505737305, "learning_rate": 5e-06, "loss": 0.653, "num_input_tokens_seen": 123334080, "step": 1968 }, { "epoch": 6.549084858569051, "loss": 0.7981342673301697, "loss_ce": 0.0012592482380568981, "loss_iou": 0.2890625, "loss_num": 0.043701171875, "loss_xval": 0.796875, "num_input_tokens_seen": 123334080, "step": 1968 }, { "epoch": 6.552412645590682, "grad_norm": 8.64057731628418, "learning_rate": 5e-06, "loss": 0.3998, "num_input_tokens_seen": 123396060, "step": 1969 }, { "epoch": 6.552412645590682, "loss": 0.4468684196472168, "loss_ce": 9.108871017815545e-05, "loss_iou": 0.1064453125, "loss_num": 0.046875, "loss_xval": 0.447265625, "num_input_tokens_seen": 123396060, "step": 1969 }, { "epoch": 6.555740432612312, "grad_norm": 24.30341339111328, "learning_rate": 5e-06, "loss": 0.7164, "num_input_tokens_seen": 123460000, "step": 1970 }, { "epoch": 6.555740432612312, "loss": 0.6704313158988953, "loss_ce": 0.0004484155506361276, "loss_iou": 0.25390625, "loss_num": 0.032470703125, "loss_xval": 0.671875, "num_input_tokens_seen": 123460000, "step": 1970 }, { "epoch": 6.559068219633943, "grad_norm": 39.814449310302734, "learning_rate": 5e-06, "loss": 0.9853, "num_input_tokens_seen": 123523824, "step": 1971 }, { "epoch": 6.559068219633943, "loss": 1.0057671070098877, "loss_ce": 0.0018607999663800001, "loss_iou": 0.365234375, "loss_num": 0.054443359375, "loss_xval": 1.0, "num_input_tokens_seen": 123523824, "step": 1971 }, { "epoch": 6.5623960066555735, "grad_norm": 27.571672439575195, "learning_rate": 5e-06, "loss": 0.7632, "num_input_tokens_seen": 123587972, "step": 1972 }, { "epoch": 6.5623960066555735, "loss": 0.6259521245956421, "loss_ce": 3.6633864510804415e-05, "loss_iou": 0.208984375, "loss_num": 0.041748046875, "loss_xval": 0.625, "num_input_tokens_seen": 123587972, "step": 1972 }, { "epoch": 6.565723793677205, "grad_norm": 34.187774658203125, "learning_rate": 5e-06, "loss": 0.7046, "num_input_tokens_seen": 123650356, "step": 1973 }, { "epoch": 6.565723793677205, "loss": 0.6025407314300537, "loss_ce": 1.6855144622240914e-06, "loss_iou": 0.203125, "loss_num": 0.03955078125, "loss_xval": 0.6015625, "num_input_tokens_seen": 123650356, "step": 1973 }, { "epoch": 6.569051580698836, "grad_norm": 20.649272918701172, "learning_rate": 5e-06, "loss": 0.6901, "num_input_tokens_seen": 123713788, "step": 1974 }, { "epoch": 6.569051580698836, "loss": 0.7274590730667114, "loss_ce": 0.0004083071544300765, "loss_iou": 0.26953125, "loss_num": 0.037353515625, "loss_xval": 0.7265625, "num_input_tokens_seen": 123713788, "step": 1974 }, { "epoch": 6.572379367720466, "grad_norm": 13.874034881591797, "learning_rate": 5e-06, "loss": 0.6211, "num_input_tokens_seen": 123777400, "step": 1975 }, { "epoch": 6.572379367720466, "loss": 0.5335060358047485, "loss_ce": 0.000791211670730263, "loss_iou": 0.1982421875, "loss_num": 0.02734375, "loss_xval": 0.53125, "num_input_tokens_seen": 123777400, "step": 1975 }, { "epoch": 6.575707154742097, "grad_norm": 9.80647087097168, "learning_rate": 5e-06, "loss": 0.6299, "num_input_tokens_seen": 123840460, "step": 1976 }, { "epoch": 6.575707154742097, "loss": 0.4524020254611969, "loss_ce": 9.460385626880452e-06, "loss_iou": 0.12890625, "loss_num": 0.038818359375, "loss_xval": 0.453125, "num_input_tokens_seen": 123840460, "step": 1976 }, { "epoch": 6.5790349417637275, "grad_norm": 29.99645233154297, "learning_rate": 5e-06, "loss": 0.6651, "num_input_tokens_seen": 123901060, "step": 1977 }, { "epoch": 6.5790349417637275, "loss": 0.8397188186645508, "loss_ce": 0.0008516378584317863, "loss_iou": 0.251953125, "loss_num": 0.06640625, "loss_xval": 0.83984375, "num_input_tokens_seen": 123901060, "step": 1977 }, { "epoch": 6.582362728785358, "grad_norm": 48.624412536621094, "learning_rate": 5e-06, "loss": 0.5813, "num_input_tokens_seen": 123962368, "step": 1978 }, { "epoch": 6.582362728785358, "loss": 0.6665137410163879, "loss_ce": 7.087117410264909e-05, "loss_iou": 0.244140625, "loss_num": 0.035888671875, "loss_xval": 0.66796875, "num_input_tokens_seen": 123962368, "step": 1978 }, { "epoch": 6.585690515806989, "grad_norm": 15.941285133361816, "learning_rate": 5e-06, "loss": 0.4458, "num_input_tokens_seen": 124024356, "step": 1979 }, { "epoch": 6.585690515806989, "loss": 0.5257315635681152, "loss_ce": 0.0004630337643902749, "loss_iou": 0.1494140625, "loss_num": 0.045166015625, "loss_xval": 0.5234375, "num_input_tokens_seen": 124024356, "step": 1979 }, { "epoch": 6.589018302828619, "grad_norm": 11.377344131469727, "learning_rate": 5e-06, "loss": 0.7924, "num_input_tokens_seen": 124087092, "step": 1980 }, { "epoch": 6.589018302828619, "loss": 0.7506057024002075, "loss_ce": 0.0005446606664918363, "loss_iou": 0.2265625, "loss_num": 0.0595703125, "loss_xval": 0.75, "num_input_tokens_seen": 124087092, "step": 1980 }, { "epoch": 6.59234608985025, "grad_norm": 22.64354133605957, "learning_rate": 5e-06, "loss": 0.6216, "num_input_tokens_seen": 124149004, "step": 1981 }, { "epoch": 6.59234608985025, "loss": 0.33752793073654175, "loss_ce": 3.5264533835288603e-06, "loss_iou": 0.11279296875, "loss_num": 0.0224609375, "loss_xval": 0.337890625, "num_input_tokens_seen": 124149004, "step": 1981 }, { "epoch": 6.595673876871881, "grad_norm": 13.62070083618164, "learning_rate": 5e-06, "loss": 0.624, "num_input_tokens_seen": 124212744, "step": 1982 }, { "epoch": 6.595673876871881, "loss": 0.7718583345413208, "loss_ce": 0.0008622486493550241, "loss_iou": 0.26171875, "loss_num": 0.04931640625, "loss_xval": 0.76953125, "num_input_tokens_seen": 124212744, "step": 1982 }, { "epoch": 6.599001663893511, "grad_norm": 32.08122253417969, "learning_rate": 5e-06, "loss": 0.7776, "num_input_tokens_seen": 124275488, "step": 1983 }, { "epoch": 6.599001663893511, "loss": 0.7189164757728577, "loss_ce": 0.00028856488643214107, "loss_iou": 0.1982421875, "loss_num": 0.064453125, "loss_xval": 0.71875, "num_input_tokens_seen": 124275488, "step": 1983 }, { "epoch": 6.602329450915142, "grad_norm": 23.14647102355957, "learning_rate": 5e-06, "loss": 0.5029, "num_input_tokens_seen": 124336988, "step": 1984 }, { "epoch": 6.602329450915142, "loss": 0.5258368253707886, "loss_ce": 0.00020208263595122844, "loss_iou": 0.1767578125, "loss_num": 0.034423828125, "loss_xval": 0.52734375, "num_input_tokens_seen": 124336988, "step": 1984 }, { "epoch": 6.605657237936772, "grad_norm": 8.717888832092285, "learning_rate": 5e-06, "loss": 0.6094, "num_input_tokens_seen": 124398312, "step": 1985 }, { "epoch": 6.605657237936772, "loss": 0.942054033279419, "loss_ce": 0.0006478212890215218, "loss_iou": 0.2890625, "loss_num": 0.07275390625, "loss_xval": 0.94140625, "num_input_tokens_seen": 124398312, "step": 1985 }, { "epoch": 6.608985024958403, "grad_norm": 11.982193946838379, "learning_rate": 5e-06, "loss": 0.4813, "num_input_tokens_seen": 124461176, "step": 1986 }, { "epoch": 6.608985024958403, "loss": 0.5817021727561951, "loss_ce": 0.0013798931613564491, "loss_iou": 0.224609375, "loss_num": 0.0262451171875, "loss_xval": 0.58203125, "num_input_tokens_seen": 124461176, "step": 1986 }, { "epoch": 6.612312811980034, "grad_norm": 40.03938674926758, "learning_rate": 5e-06, "loss": 0.5984, "num_input_tokens_seen": 124524312, "step": 1987 }, { "epoch": 6.612312811980034, "loss": 0.7929661273956299, "loss_ce": 0.00024154715356417, "loss_iou": 0.27734375, "loss_num": 0.04736328125, "loss_xval": 0.79296875, "num_input_tokens_seen": 124524312, "step": 1987 }, { "epoch": 6.615640599001664, "grad_norm": 20.670000076293945, "learning_rate": 5e-06, "loss": 0.7356, "num_input_tokens_seen": 124586984, "step": 1988 }, { "epoch": 6.615640599001664, "loss": 0.534072995185852, "loss_ce": 0.000625723332632333, "loss_iou": 0.17578125, "loss_num": 0.03662109375, "loss_xval": 0.53515625, "num_input_tokens_seen": 124586984, "step": 1988 }, { "epoch": 6.618968386023295, "grad_norm": 13.344552993774414, "learning_rate": 5e-06, "loss": 0.7439, "num_input_tokens_seen": 124648496, "step": 1989 }, { "epoch": 6.618968386023295, "loss": 0.8475092053413391, "loss_ce": 0.0009515842539258301, "loss_iou": 0.28125, "loss_num": 0.056640625, "loss_xval": 0.84765625, "num_input_tokens_seen": 124648496, "step": 1989 }, { "epoch": 6.6222961730449255, "grad_norm": 17.5792179107666, "learning_rate": 5e-06, "loss": 0.5477, "num_input_tokens_seen": 124711420, "step": 1990 }, { "epoch": 6.6222961730449255, "loss": 0.6316379904747009, "loss_ce": 4.6201777877286077e-05, "loss_iou": 0.205078125, "loss_num": 0.044677734375, "loss_xval": 0.6328125, "num_input_tokens_seen": 124711420, "step": 1990 }, { "epoch": 6.625623960066556, "grad_norm": 11.981757164001465, "learning_rate": 5e-06, "loss": 0.5592, "num_input_tokens_seen": 124774256, "step": 1991 }, { "epoch": 6.625623960066556, "loss": 0.4685099422931671, "loss_ce": 4.079750851815334e-06, "loss_iou": 0.115234375, "loss_num": 0.0478515625, "loss_xval": 0.46875, "num_input_tokens_seen": 124774256, "step": 1991 }, { "epoch": 6.628951747088187, "grad_norm": 19.191776275634766, "learning_rate": 5e-06, "loss": 0.751, "num_input_tokens_seen": 124837888, "step": 1992 }, { "epoch": 6.628951747088187, "loss": 0.8945164084434509, "loss_ce": 0.0010838116286322474, "loss_iou": 0.34375, "loss_num": 0.041259765625, "loss_xval": 0.89453125, "num_input_tokens_seen": 124837888, "step": 1992 }, { "epoch": 6.632279534109817, "grad_norm": 15.3238525390625, "learning_rate": 5e-06, "loss": 0.7391, "num_input_tokens_seen": 124902180, "step": 1993 }, { "epoch": 6.632279534109817, "loss": 0.8709877729415894, "loss_ce": 0.0006264817784540355, "loss_iou": 0.337890625, "loss_num": 0.038818359375, "loss_xval": 0.87109375, "num_input_tokens_seen": 124902180, "step": 1993 }, { "epoch": 6.635607321131448, "grad_norm": 26.2684383392334, "learning_rate": 5e-06, "loss": 0.6892, "num_input_tokens_seen": 124964652, "step": 1994 }, { "epoch": 6.635607321131448, "loss": 0.5876778364181519, "loss_ce": 3.134192229481414e-05, "loss_iou": 0.185546875, "loss_num": 0.04345703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 124964652, "step": 1994 }, { "epoch": 6.6389351081530785, "grad_norm": 15.246932029724121, "learning_rate": 5e-06, "loss": 0.5931, "num_input_tokens_seen": 125027128, "step": 1995 }, { "epoch": 6.6389351081530785, "loss": 0.5009132623672485, "loss_ce": 0.0004249872581567615, "loss_iou": 0.1728515625, "loss_num": 0.031005859375, "loss_xval": 0.5, "num_input_tokens_seen": 125027128, "step": 1995 }, { "epoch": 6.642262895174709, "grad_norm": 16.900373458862305, "learning_rate": 5e-06, "loss": 0.5838, "num_input_tokens_seen": 125090272, "step": 1996 }, { "epoch": 6.642262895174709, "loss": 0.6842046976089478, "loss_ce": 0.0001226711319759488, "loss_iou": 0.2265625, "loss_num": 0.04638671875, "loss_xval": 0.68359375, "num_input_tokens_seen": 125090272, "step": 1996 }, { "epoch": 6.64559068219634, "grad_norm": 11.600629806518555, "learning_rate": 5e-06, "loss": 0.5266, "num_input_tokens_seen": 125152640, "step": 1997 }, { "epoch": 6.64559068219634, "loss": 0.5774567127227783, "loss_ce": 3.054131411772687e-06, "loss_iou": 0.2216796875, "loss_num": 0.0269775390625, "loss_xval": 0.578125, "num_input_tokens_seen": 125152640, "step": 1997 }, { "epoch": 6.64891846921797, "grad_norm": 20.360536575317383, "learning_rate": 5e-06, "loss": 0.5939, "num_input_tokens_seen": 125213584, "step": 1998 }, { "epoch": 6.64891846921797, "loss": 0.7232832908630371, "loss_ce": 0.0008712068083696067, "loss_iou": 0.275390625, "loss_num": 0.0341796875, "loss_xval": 0.72265625, "num_input_tokens_seen": 125213584, "step": 1998 }, { "epoch": 6.652246256239601, "grad_norm": 8.929659843444824, "learning_rate": 5e-06, "loss": 0.5807, "num_input_tokens_seen": 125276516, "step": 1999 }, { "epoch": 6.652246256239601, "loss": 0.4460247755050659, "loss_ce": 0.00046813851804472506, "loss_iou": 0.16015625, "loss_num": 0.025146484375, "loss_xval": 0.4453125, "num_input_tokens_seen": 125276516, "step": 1999 }, { "epoch": 6.655574043261232, "grad_norm": 14.47555923461914, "learning_rate": 5e-06, "loss": 0.6939, "num_input_tokens_seen": 125338692, "step": 2000 }, { "epoch": 6.655574043261232, "eval_seeclick_CIoU": 0.1068805642426014, "eval_seeclick_GIoU": 0.11891250684857368, "eval_seeclick_IoU": 0.20469707250595093, "eval_seeclick_MAE_all": 0.16567593067884445, "eval_seeclick_MAE_h": 0.05109903775155544, "eval_seeclick_MAE_w": 0.12995515763759613, "eval_seeclick_MAE_x_boxes": 0.22886644303798676, "eval_seeclick_MAE_y_boxes": 0.13595783710479736, "eval_seeclick_NUM_probability": 0.9999660849571228, "eval_seeclick_inside_bbox": 0.22500000149011612, "eval_seeclick_loss": 2.713005304336548, "eval_seeclick_loss_ce": 0.11975685507059097, "eval_seeclick_loss_iou": 0.88720703125, "eval_seeclick_loss_num": 0.1656036376953125, "eval_seeclick_loss_xval": 2.6025390625, "eval_seeclick_runtime": 69.2988, "eval_seeclick_samples_per_second": 0.678, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 125338692, "step": 2000 }, { "epoch": 6.655574043261232, "eval_icons_CIoU": 0.03829660825431347, "eval_icons_GIoU": 0.1595398709177971, "eval_icons_IoU": 0.19770432263612747, "eval_icons_MAE_all": 0.16016975790262222, "eval_icons_MAE_h": 0.0871335044503212, "eval_icons_MAE_w": 0.15490344911813736, "eval_icons_MAE_x_boxes": 0.17506500333547592, "eval_icons_MAE_y_boxes": 0.0526757575571537, "eval_icons_NUM_probability": 0.9999957382678986, "eval_icons_inside_bbox": 0.3420138955116272, "eval_icons_loss": 2.462501287460327, "eval_icons_loss_ce": 1.5740541243758344e-06, "eval_icons_loss_iou": 0.8251953125, "eval_icons_loss_num": 0.1562976837158203, "eval_icons_loss_xval": 2.430908203125, "eval_icons_runtime": 71.6265, "eval_icons_samples_per_second": 0.698, "eval_icons_steps_per_second": 0.028, "num_input_tokens_seen": 125338692, "step": 2000 }, { "epoch": 6.655574043261232, "eval_screenspot_CIoU": 0.0693756896071136, "eval_screenspot_GIoU": 0.13485626379648843, "eval_screenspot_IoU": 0.21177987257639566, "eval_screenspot_MAE_all": 0.1937429408232371, "eval_screenspot_MAE_h": 0.06855036566654842, "eval_screenspot_MAE_w": 0.1509904464085897, "eval_screenspot_MAE_x_boxes": 0.2597081462542216, "eval_screenspot_MAE_y_boxes": 0.1477653905749321, "eval_screenspot_NUM_probability": 0.9999565482139587, "eval_screenspot_inside_bbox": 0.37583333253860474, "eval_screenspot_loss": 2.735307455062866, "eval_screenspot_loss_ce": 5.104218810932556e-05, "eval_screenspot_loss_iou": 0.8837890625, "eval_screenspot_loss_num": 0.20152791341145834, "eval_screenspot_loss_xval": 2.7766927083333335, "eval_screenspot_runtime": 131.5463, "eval_screenspot_samples_per_second": 0.677, "eval_screenspot_steps_per_second": 0.023, "num_input_tokens_seen": 125338692, "step": 2000 }, { "epoch": 6.655574043261232, "eval_compot_CIoU": 0.005251538008451462, "eval_compot_GIoU": 0.10733498632907867, "eval_compot_IoU": 0.17231258749961853, "eval_compot_MAE_all": 0.19772262126207352, "eval_compot_MAE_h": 0.05648845434188843, "eval_compot_MAE_w": 0.24372312426567078, "eval_compot_MAE_x_boxes": 0.20571433007717133, "eval_compot_MAE_y_boxes": 0.10716105252504349, "eval_compot_NUM_probability": 0.9999851286411285, "eval_compot_inside_bbox": 0.3229166716337204, "eval_compot_loss": 2.8182108402252197, "eval_compot_loss_ce": 0.0029962222324684262, "eval_compot_loss_iou": 0.91943359375, "eval_compot_loss_num": 0.20189666748046875, "eval_compot_loss_xval": 2.84716796875, "eval_compot_runtime": 78.1032, "eval_compot_samples_per_second": 0.64, "eval_compot_steps_per_second": 0.026, "num_input_tokens_seen": 125338692, "step": 2000 }, { "epoch": 6.655574043261232, "eval_custom_ui_MAE_all": 0.07614851370453835, "eval_custom_ui_MAE_x": 0.07330591231584549, "eval_custom_ui_MAE_y": 0.0789911113679409, "eval_custom_ui_NUM_probability": 0.9999920725822449, "eval_custom_ui_loss": 0.3671107590198517, "eval_custom_ui_loss_ce": 1.62217548904664e-06, "eval_custom_ui_loss_num": 0.0698394775390625, "eval_custom_ui_loss_xval": 0.34930419921875, "eval_custom_ui_runtime": 54.0842, "eval_custom_ui_samples_per_second": 0.924, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 125338692, "step": 2000 } ], "logging_steps": 1.0, "max_steps": 15000, "num_input_tokens_seen": 125338692, "num_train_epochs": 50, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.830993252785848e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }